[llvm] b1fe7da - [AMDGPU][True16][CodeGen] enable true16 for more codegen test patch 2 (#131210)

via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 11:58:00 PDT 2025


Author: Brox Chen
Date: 2025-03-14T14:57:55-04:00
New Revision: b1fe7dabceec3341a2aeb466fd40749ce2c61b13

URL: https://github.com/llvm/llvm-project/commit/b1fe7dabceec3341a2aeb466fd40749ce2c61b13
DIFF: https://github.com/llvm/llvm-project/commit/b1fe7dabceec3341a2aeb466fd40749ce2c61b13.diff

LOG: [AMDGPU][True16][CodeGen] enable true16 for more codegen test patch 2 (#131210)

This is a NFC patch.

Enable true16 mode for more CodeGen tests

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
    llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
    llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
    llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
    llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
    llvm/test/CodeGen/AMDGPU/select.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
    llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
    llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
    llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
index 867025adca944..644c88457714b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
@@ -3,8 +3,10 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX8 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX12 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX12,GFX12-FAKE16 %s
 
 define amdgpu_kernel void @cos_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX6-LABEL: cos_f16:
@@ -69,31 +71,57 @@ define amdgpu_kernel void @cos_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: cos_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cos_f16_e32 v1, v1
-; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: cos_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cos_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX11-TRUE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: cos_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_cos_f16_e32 v1, v1
-; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX11-FAKE16-LABEL: cos_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cos_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-FAKE16-NEXT:    s_endpgm
+;
+; GFX12-TRUE16-LABEL: cos_f16:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v0.l
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_cos_f16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: cos_f16:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_cos_f16_e32 v1, v1
+; GFX12-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.cos.f16(half %a.val)
   store half %r.val, ptr addrspace(1) %r
@@ -184,42 +212,79 @@ define amdgpu_kernel void @cos_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: cos_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
-; GFX11-NEXT:    v_cos_f16_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cos_f16_e32 v2, v2
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_pack_b32_f16 v1, v1, v2
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: cos_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    global_load_b32 v0, v1, s[2:3]
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.h, 0.15915494, v2.l
+; GFX11-TRUE16-NEXT:    v_cos_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cos_f16_e32 v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: cos_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
+; GFX11-FAKE16-NEXT:    v_cos_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cos_f16_e32 v2, v2
+; GFX11-FAKE16-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, v2
+; GFX11-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-FAKE16-NEXT:    s_endpgm
+;
+; GFX12-TRUE16-LABEL: cos_v2f16:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_b32 v2, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v2.l
+; GFX12-TRUE16-NEXT:    v_mul_f16_e32 v0.h, 0.15915494, v2.l
+; GFX12-TRUE16-NEXT:    ; kill: def $vgpr2 killed $vgpr2_lo16 killed $exec
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_cos_f16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT:    v_cos_f16_e32 v0.h, v0.h
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
+; GFX12-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX12-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: cos_v2f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX12-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
-; GFX12-NEXT:    v_cos_f16_e32 v1, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
-; GFX12-NEXT:    v_cos_f16_e32 v2, v2
-; GFX12-NEXT:    v_pack_b32_f16 v1, v1, v2
-; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-FAKE16-LABEL: cos_v2f16:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX12-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-FAKE16-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
+; GFX12-FAKE16-NEXT:    v_cos_f16_e32 v1, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX12-FAKE16-NEXT:    v_cos_f16_e32 v2, v2
+; GFX12-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, v2
+; GFX12-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.cos.v2f16(<2 x half> %a.val)
   store <2 x half> %r.val, ptr addrspace(1) %r
@@ -228,3 +293,6 @@ define amdgpu_kernel void @cos_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 
 declare half @llvm.cos.f16(half %a)
 declare <2 x half> @llvm.cos.v2f16(<2 x half> %a)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX12: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index 814f44477f528..61991c8b409dd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -5,8 +5,10 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI-DENORM %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-FLUSH %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-DENORM %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-FLUSH %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-DENORM %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-FLUSH,GFX11-FLUSH-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-FLUSH,GFX11-FLUSH-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-DENORM,GFX11-DENORM-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-DENORM,GFX11-DENORM-FAKE16 %s
 
 declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
@@ -160,64 +162,122 @@ define amdgpu_kernel void @fmuladd_f16(
 ; GFX10-DENORM-NEXT:    buffer_store_short v2, off, s[0:3], 0
 ; GFX10-DENORM-NEXT:    s_endpgm
 ;
-; GFX11-FLUSH-LABEL: fmuladd_f16:
-; GFX11-FLUSH:       ; %bb.0:
-; GFX11-FLUSH-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-FLUSH-NEXT:    s_mov_b32 s10, -1
-; GFX11-FLUSH-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-FLUSH-NEXT:    s_mov_b32 s14, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s15, s11
-; GFX11-FLUSH-NEXT:    s_mov_b32 s18, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s19, s11
-; GFX11-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-FLUSH-NEXT:    s_mov_b32 s12, s2
-; GFX11-FLUSH-NEXT:    s_mov_b32 s13, s3
-; GFX11-FLUSH-NEXT:    s_mov_b32 s16, s4
-; GFX11-FLUSH-NEXT:    s_mov_b32 s17, s5
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v1, off, s[16:19], 0
-; GFX11-FLUSH-NEXT:    s_mov_b32 s4, s6
-; GFX11-FLUSH-NEXT:    s_mov_b32 s5, s7
-; GFX11-FLUSH-NEXT:    s_mov_b32 s6, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s7, s11
-; GFX11-FLUSH-NEXT:    s_mov_b32 s8, s0
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v2, off, s[4:7], 0
-; GFX11-FLUSH-NEXT:    s_mov_b32 s9, s1
-; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-FLUSH-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FLUSH-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX11-FLUSH-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-FLUSH-NEXT:    s_endpgm
+; GFX11-FLUSH-TRUE16-LABEL: fmuladd_f16:
+; GFX11-FLUSH-TRUE16:       ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v2, off, s[12:15], 0
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FLUSH-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v2.l
+; GFX11-FLUSH-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FLUSH-TRUE16-NEXT:    s_endpgm
 ;
-; GFX11-DENORM-LABEL: fmuladd_f16:
-; GFX11-DENORM:       ; %bb.0:
-; GFX11-DENORM-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-DENORM-NEXT:    s_mov_b32 s10, -1
-; GFX11-DENORM-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-DENORM-NEXT:    s_mov_b32 s14, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s15, s11
-; GFX11-DENORM-NEXT:    s_mov_b32 s18, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s19, s11
-; GFX11-DENORM-NEXT:    s_mov_b32 s22, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s23, s11
-; GFX11-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-DENORM-NEXT:    s_mov_b32 s12, s2
-; GFX11-DENORM-NEXT:    s_mov_b32 s13, s3
-; GFX11-DENORM-NEXT:    s_mov_b32 s16, s4
-; GFX11-DENORM-NEXT:    s_mov_b32 s17, s5
-; GFX11-DENORM-NEXT:    s_mov_b32 s20, s6
-; GFX11-DENORM-NEXT:    s_mov_b32 s21, s7
-; GFX11-DENORM-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
-; GFX11-DENORM-NEXT:    buffer_load_u16 v1, off, s[16:19], 0
-; GFX11-DENORM-NEXT:    buffer_load_u16 v2, off, s[20:23], 0
-; GFX11-DENORM-NEXT:    s_mov_b32 s8, s0
-; GFX11-DENORM-NEXT:    s_mov_b32 s9, s1
-; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-DENORM-NEXT:    v_fmac_f16_e32 v2, v0, v1
-; GFX11-DENORM-NEXT:    buffer_store_b16 v2, off, s[8:11], 0
-; GFX11-DENORM-NEXT:    s_endpgm
+; GFX11-FLUSH-FAKE16-LABEL: fmuladd_f16:
+; GFX11-FLUSH-FAKE16:       ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s4, s6
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s5, s7
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v2, off, s[4:7], 0
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FLUSH-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FLUSH-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-DENORM-TRUE16-LABEL: fmuladd_f16:
+; GFX11-DENORM-TRUE16:       ; %bb.0:
+; GFX11-DENORM-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v2, off, s[20:23], 0
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    v_fmac_f16_e32 v2.l, v0.l, v1.l
+; GFX11-DENORM-TRUE16-NEXT:    buffer_store_b16 v2, off, s[8:11], 0
+; GFX11-DENORM-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-DENORM-FAKE16-LABEL: fmuladd_f16:
+; GFX11-DENORM-FAKE16:       ; %bb.0:
+; GFX11-DENORM-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v2, off, s[20:23], 0
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    v_fmac_f16_e32 v2, v0, v1
+; GFX11-DENORM-FAKE16-NEXT:    buffer_store_b16 v2, off, s[8:11], 0
+; GFX11-DENORM-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
@@ -353,55 +413,105 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
 ; GFX10-DENORM-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; GFX10-DENORM-NEXT:    s_endpgm
 ;
-; GFX11-FLUSH-LABEL: fmuladd_f16_imm_a:
-; GFX11-FLUSH:       ; %bb.0:
-; GFX11-FLUSH-NEXT:    s_clause 0x1
-; GFX11-FLUSH-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-FLUSH-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-FLUSH-NEXT:    s_mov_b32 s10, -1
-; GFX11-FLUSH-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-FLUSH-NEXT:    s_mov_b32 s14, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s15, s11
-; GFX11-FLUSH-NEXT:    s_mov_b32 s6, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s7, s11
-; GFX11-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-FLUSH-NEXT:    s_mov_b32 s12, s2
-; GFX11-FLUSH-NEXT:    s_mov_b32 s13, s3
-; GFX11-FLUSH-NEXT:    s_mov_b32 s8, s0
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-FLUSH-NEXT:    s_mov_b32 s9, s1
-; GFX11-FLUSH-NEXT:    v_mul_f16_e32 v0, 0x4200, v0
-; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FLUSH-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-FLUSH-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-FLUSH-NEXT:    s_endpgm
+; GFX11-FLUSH-TRUE16-LABEL: fmuladd_f16_imm_a:
+; GFX11-FLUSH-TRUE16:       ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT:    s_clause 0x1
+; GFX11-FLUSH-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FLUSH-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FLUSH-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0x4200, v0.l
+; GFX11-FLUSH-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FLUSH-TRUE16-NEXT:    s_endpgm
 ;
-; GFX11-DENORM-LABEL: fmuladd_f16_imm_a:
-; GFX11-DENORM:       ; %bb.0:
-; GFX11-DENORM-NEXT:    s_clause 0x1
-; GFX11-DENORM-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-DENORM-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-DENORM-NEXT:    s_mov_b32 s10, -1
-; GFX11-DENORM-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-DENORM-NEXT:    s_mov_b32 s14, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s15, s11
-; GFX11-DENORM-NEXT:    s_mov_b32 s6, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s7, s11
-; GFX11-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-DENORM-NEXT:    s_mov_b32 s12, s2
-; GFX11-DENORM-NEXT:    s_mov_b32 s13, s3
-; GFX11-DENORM-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-DENORM-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-DENORM-NEXT:    s_mov_b32 s8, s0
-; GFX11-DENORM-NEXT:    s_mov_b32 s9, s1
-; GFX11-DENORM-NEXT:    v_fmac_f16_e32 v1, 0x4200, v0
-; GFX11-DENORM-NEXT:    buffer_store_b16 v1, off, s[8:11], 0
-; GFX11-DENORM-NEXT:    s_endpgm
+; GFX11-FLUSH-FAKE16-LABEL: fmuladd_f16_imm_a:
+; GFX11-FLUSH-FAKE16:       ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FLUSH-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FLUSH-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FLUSH-FAKE16-NEXT:    v_mul_f16_e32 v0, 0x4200, v0
+; GFX11-FLUSH-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FLUSH-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-DENORM-TRUE16-LABEL: fmuladd_f16_imm_a:
+; GFX11-DENORM-TRUE16:       ; %bb.0:
+; GFX11-DENORM-TRUE16-NEXT:    s_clause 0x1
+; GFX11-DENORM-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-DENORM-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-DENORM-TRUE16-NEXT:    v_fmac_f16_e32 v1.l, 0x4200, v0.l
+; GFX11-DENORM-TRUE16-NEXT:    buffer_store_b16 v1, off, s[8:11], 0
+; GFX11-DENORM-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-DENORM-FAKE16-LABEL: fmuladd_f16_imm_a:
+; GFX11-DENORM-FAKE16:       ; %bb.0:
+; GFX11-DENORM-FAKE16-NEXT:    s_clause 0x1
+; GFX11-DENORM-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-DENORM-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-DENORM-FAKE16-NEXT:    v_fmac_f16_e32 v1, 0x4200, v0
+; GFX11-DENORM-FAKE16-NEXT:    buffer_store_b16 v1, off, s[8:11], 0
+; GFX11-DENORM-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %b,
     ptr addrspace(1) %c) {
@@ -535,55 +645,105 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
 ; GFX10-DENORM-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; GFX10-DENORM-NEXT:    s_endpgm
 ;
-; GFX11-FLUSH-LABEL: fmuladd_f16_imm_b:
-; GFX11-FLUSH:       ; %bb.0:
-; GFX11-FLUSH-NEXT:    s_clause 0x1
-; GFX11-FLUSH-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-FLUSH-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-FLUSH-NEXT:    s_mov_b32 s10, -1
-; GFX11-FLUSH-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-FLUSH-NEXT:    s_mov_b32 s14, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s15, s11
-; GFX11-FLUSH-NEXT:    s_mov_b32 s6, s10
-; GFX11-FLUSH-NEXT:    s_mov_b32 s7, s11
-; GFX11-FLUSH-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-FLUSH-NEXT:    s_mov_b32 s12, s2
-; GFX11-FLUSH-NEXT:    s_mov_b32 s13, s3
-; GFX11-FLUSH-NEXT:    s_mov_b32 s8, s0
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-FLUSH-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-FLUSH-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-FLUSH-NEXT:    s_mov_b32 s9, s1
-; GFX11-FLUSH-NEXT:    v_mul_f16_e32 v0, 0x4200, v0
-; GFX11-FLUSH-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-FLUSH-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-FLUSH-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-FLUSH-NEXT:    s_endpgm
+; GFX11-FLUSH-TRUE16-LABEL: fmuladd_f16_imm_b:
+; GFX11-FLUSH-TRUE16:       ; %bb.0:
+; GFX11-FLUSH-TRUE16-NEXT:    s_clause 0x1
+; GFX11-FLUSH-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FLUSH-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FLUSH-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FLUSH-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0x4200, v0.l
+; GFX11-FLUSH-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-FLUSH-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FLUSH-TRUE16-NEXT:    s_endpgm
 ;
-; GFX11-DENORM-LABEL: fmuladd_f16_imm_b:
-; GFX11-DENORM:       ; %bb.0:
-; GFX11-DENORM-NEXT:    s_clause 0x1
-; GFX11-DENORM-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-DENORM-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-DENORM-NEXT:    s_mov_b32 s10, -1
-; GFX11-DENORM-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-DENORM-NEXT:    s_mov_b32 s14, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s15, s11
-; GFX11-DENORM-NEXT:    s_mov_b32 s6, s10
-; GFX11-DENORM-NEXT:    s_mov_b32 s7, s11
-; GFX11-DENORM-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-DENORM-NEXT:    s_mov_b32 s12, s2
-; GFX11-DENORM-NEXT:    s_mov_b32 s13, s3
-; GFX11-DENORM-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-DENORM-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-DENORM-NEXT:    s_mov_b32 s8, s0
-; GFX11-DENORM-NEXT:    s_mov_b32 s9, s1
-; GFX11-DENORM-NEXT:    v_fmac_f16_e32 v1, 0x4200, v0
-; GFX11-DENORM-NEXT:    buffer_store_b16 v1, off, s[8:11], 0
-; GFX11-DENORM-NEXT:    s_endpgm
+; GFX11-FLUSH-FAKE16-LABEL: fmuladd_f16_imm_b:
+; GFX11-FLUSH-FAKE16:       ; %bb.0:
+; GFX11-FLUSH-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FLUSH-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FLUSH-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FLUSH-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FLUSH-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FLUSH-FAKE16-NEXT:    v_mul_f16_e32 v0, 0x4200, v0
+; GFX11-FLUSH-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FLUSH-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FLUSH-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FLUSH-FAKE16-NEXT:    s_endpgm
+;
+; GFX11-DENORM-TRUE16-LABEL: fmuladd_f16_imm_b:
+; GFX11-DENORM-TRUE16:       ; %bb.0:
+; GFX11-DENORM-TRUE16-NEXT:    s_clause 0x1
+; GFX11-DENORM-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-DENORM-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-DENORM-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-DENORM-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-DENORM-TRUE16-NEXT:    v_fmac_f16_e32 v1.l, 0x4200, v0.l
+; GFX11-DENORM-TRUE16-NEXT:    buffer_store_b16 v1, off, s[8:11], 0
+; GFX11-DENORM-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-DENORM-FAKE16-LABEL: fmuladd_f16_imm_b:
+; GFX11-DENORM-FAKE16:       ; %bb.0:
+; GFX11-DENORM-FAKE16-NEXT:    s_clause 0x1
+; GFX11-DENORM-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-DENORM-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-DENORM-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-DENORM-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-DENORM-FAKE16-NEXT:    v_fmac_f16_e32 v1, 0x4200, v0
+; GFX11-DENORM-FAKE16-NEXT:    buffer_store_b16 v1, off, s[8:11], 0
+; GFX11-DENORM-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %c) {

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index 480d978fa530b..d329a7428115a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -8,8 +8,10 @@
 ; xUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
 ; RUN:  llc -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
 ; xUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
-; RUN:  llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s
-; xUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s
+; RUN:  llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
+; RUN:  llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
+; xUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
+; xUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
 
 define amdgpu_kernel void @sgpr_isnan_bf16(ptr addrspace(1) %out, bfloat %x) {
 ; GFX7CHECK-LABEL: sgpr_isnan_bf16:
@@ -203,15 +205,25 @@ define i1 @snan_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: snan_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: snan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: snan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1)  ; 0x001
   ret i1 %1
 }
@@ -253,13 +265,21 @@ define i1 @qnan_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: qnan_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: qnan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: qnan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 2)  ; 0x002
   ret i1 %1
 }
@@ -298,12 +318,19 @@ define i1 @posinf_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: posinf_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: posinf_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: posinf_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 512)  ; 0x200
   ret i1 %1
 }
@@ -342,12 +369,19 @@ define i1 @neginf_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: neginf_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: neginf_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: neginf_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 4)  ; 0x004
   ret i1 %1
 }
@@ -403,16 +437,27 @@ define i1 @posnormal_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: posnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: posnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, 0xff80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: posnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, 0xff80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 256)  ; 0x100
   ret i1 %1
 }
@@ -468,16 +513,27 @@ define i1 @negnormal_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, 0xff80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, 0xff80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f00, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 8)  ; 0x008
   ret i1 %1
 }
@@ -521,13 +577,21 @@ define i1 @possubnormal_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: possubnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, v0, -1
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: possubnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, -1
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: possubnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v0, v0, -1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 128)  ; 0x080
   ret i1 %1
 }
@@ -582,16 +646,27 @@ define i1 @negsubnormal_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negsubnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v1, -1
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negsubnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, v0.h, -1
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negsubnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, v1, -1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 16)  ; 0x010
   ret i1 %1
 }
@@ -627,12 +702,19 @@ define i1 @poszero_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: poszero_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: poszero_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: poszero_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 64)  ; 0x040
   ret i1 %1
 }
@@ -671,12 +753,19 @@ define i1 @negzero_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negzero_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negzero_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negzero_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 32)  ; 0x020
   ret i1 %1
 }
@@ -715,12 +804,19 @@ define i1 @posfinite_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: posfinite_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: posfinite_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: posfinite_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 448)  ; 0x1c0
   ret i1 %1
 }
@@ -771,15 +867,25 @@ define i1 @negfinite_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negfinite_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negfinite_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negfinite_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 56)  ; 0x038
   ret i1 %1
 }
@@ -821,13 +927,21 @@ define i1 @isnan_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnan_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3)  ; nan
   ret i1 %1
 }
@@ -869,13 +983,21 @@ define i1 @not_isnan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isnan_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isnan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isnan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1020)  ; ~nan
   ret i1 %class
 }
@@ -929,16 +1051,27 @@ define <2 x i1> @isnan_v2bf16(<2 x bfloat> %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnan_v2bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX11CHECK-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_v2bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_v2bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call <2 x i1> @llvm.is.fpclass.v2bf16(<2 x bfloat> %x, i32 3)  ; nan
   ret <2 x i1> %1
 }
@@ -1005,19 +1138,33 @@ define <3 x i1> @isnan_v3bf16(<3 x bfloat> %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnan_v3bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX11CHECK-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
-; GFX11CHECK-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_v3bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_v3bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat> %x, i32 3)  ; nan
   ret <3 x i1> %1
 }
@@ -1095,22 +1242,39 @@ define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnan_v4bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11CHECK-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_v4bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_v4bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff7fff, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat> %x, i32 3)  ; nan
   ret <4 x i1> %1
 }
@@ -1158,13 +1322,21 @@ define i1 @isinf_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isinf_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isinf_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isinf_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516)  ; 0x204 = "inf"
   ret i1 %1
 }
@@ -1206,13 +1378,21 @@ define i1 @isfinite_bf16(bfloat %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isfinite_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isfinite_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isfinite_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 504)  ; 0x1f8 = "finite"
   ret i1 %1
 }
@@ -1252,13 +1432,21 @@ define i1 @issubnormal_or_zero_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: issubnormal_or_zero_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: issubnormal_or_zero_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: issubnormal_or_zero_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 240)  ; 0xf0 = "subnormal|zero"
   ret i1 %class
@@ -1299,13 +1487,21 @@ define i1 @not_issubnormal_or_zero_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_issubnormal_or_zero_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_or_zero_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_or_zero_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
     %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 783)  ; ~0xf0 = "~(subnormal|zero)"
   ret i1 %class
@@ -1353,14 +1549,23 @@ define i1 @isnormal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v0, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 264)  ; 0x108 = "normal"
   ret i1 %class
 }
@@ -1407,14 +1612,23 @@ define i1 @not_isnormal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v0, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 759)  ; ~0x108 = "~normal"
   ret i1 %class
 }
@@ -1470,16 +1684,27 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_is_plus_normal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
-; GFX11CHECK-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v1
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_is_plus_normal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, 0xff80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_is_plus_normal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, 0xff80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 767)  ; ~0x100 = ~"+normal"
   ret i1 %class
 }
@@ -1535,16 +1760,27 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_is_neg_normal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, 0xff80, v1
-; GFX11CHECK-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v1
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_is_neg_normal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, 0xff80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_is_neg_normal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, -1, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, 0xff80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_u16_e64 s0, 0x7eff, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1015)  ; ~0x008 = ~"-normal"
   ret i1 %class
 }
@@ -1590,14 +1826,23 @@ define i1 @issubnormal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: issubnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, v0, -1
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: issubnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, -1
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: issubnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v0, v0, -1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 144)  ; 0x90 = "subnormal"
   ret i1 %class
 }
@@ -1643,14 +1888,23 @@ define i1 @not_issubnormal_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_issubnormal_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, v0, -1
-; GFX11CHECK-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.l, v0.l, -1
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v0, v0, -1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 879)  ; ~0x90 = ~"subnormal"
   ret i1 %class
 }
@@ -1689,13 +1943,21 @@ define i1 @iszero_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 96)  ; 0x60 = "zero"
   ret i1 %class
 }
@@ -1734,13 +1996,21 @@ define i1 @not_iszero_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 927)  ; ~0x60 = ~"zero"
   ret i1 %class
 }
@@ -1779,12 +2049,19 @@ define i1 @ispositive_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: ispositive_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: ispositive_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: ispositive_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 960)  ; fcPositive
   ret i1 %class
 }
@@ -1855,19 +2132,33 @@ define i1 @not_ispositive_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_ispositive_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s1, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v1
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s2, 0x7f80, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, s1
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, s2
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_ispositive_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e64 s2, 0x7f80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, s1
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, s2
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_ispositive_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s1, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e64 s2, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, s1
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, s2
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 63)  ; ~fcPositive
   ret i1 %class
 }
@@ -1930,17 +2221,29 @@ define i1 @isnegative_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnegative_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s1, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, s1
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnegative_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, s1
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnegative_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s1, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e64 s0, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, s1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 60)  ; fcNegative
   ret i1 %class
 }
@@ -1994,15 +2297,25 @@ define i1 @not_isnegative_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isnegative_bf16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v1
-; GFX11CHECK-NEXT:    s_or_b32 s0, vcc_lo, s0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isnegative_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isnegative_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, vcc_lo, s0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 963)  ; ~fcNegative
   ret i1 %class
 }
@@ -2052,15 +2365,25 @@ define i1 @iszero_or_nan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_nan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99)  ; 0x60|0x3 = "zero|nan"
   ret i1 %0
@@ -2111,15 +2434,25 @@ define i1 @iszero_or_nan_f_daz(bfloat %x) #0 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_nan_f_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99)  ; 0x60|0x3 = "zero|nan"
   ret i1 %0
@@ -2170,15 +2503,25 @@ define i1 @iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_nan_f_maybe_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99)  ; 0x60|0x3 = "zero|nan"
   ret i1 %0
@@ -2229,15 +2572,25 @@ define i1 @not_iszero_or_nan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_nan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
-; GFX11CHECK-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924)  ; ~0x60 = "~(zero|nan)"
   ret i1 %0
@@ -2288,15 +2641,25 @@ define i1 @not_iszero_or_nan_f_daz(bfloat %x) #0 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_nan_f_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
-; GFX11CHECK-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924)  ; ~(0x60|0x3) = "~(zero|nan)"
   ret i1 %0
@@ -2347,15 +2710,25 @@ define i1 @not_iszero_or_nan_f_maybe_daz(bfloat %x) #1 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
-; GFX11CHECK-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_ne_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924)  ; ~(0x60|0x3) = "~(zero|nan)"
   ret i1 %0
@@ -2406,15 +2779,25 @@ define i1 @iszero_or_qnan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_qnan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_qnan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_qnan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s0, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 98)  ; 0x60|0x2 = "zero|qnan"
   ret i1 %0
@@ -2476,17 +2859,29 @@ define i1 @iszero_or_snan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_snan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s1, 0, v0
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    s_or_b32 s0, s1, s0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_snan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s1, s0
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_snan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s1, 0, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s1, s0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 97)  ; 0x60|0x1 = "zero|snan"
   ret i1 %0
@@ -2579,23 +2974,41 @@ define i1 @not_iszero_or_qnan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_qnan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v0, -1
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e64 s1, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v0, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f, v1
-; GFX11CHECK-NEXT:    s_and_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
-; GFX11CHECK-NEXT:    s_or_b32 s1, s2, s1
-; GFX11CHECK-NEXT:    s_or_b32 s0, s1, s0
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_qnan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, v0.l, -1
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e64 s1, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s1, s2, s1
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_qnan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, v0, -1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e64 s0, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e64 s1, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v0, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f, v1
+; GFX11SELDAG-FAKE16-NEXT:    s_and_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s1, s2, s1
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 925)  ; ~(0x60|0x2) = "~(zero|qnan)"
   ret i1 %0
@@ -2680,21 +3093,37 @@ define i1 @not_iszero_or_snan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_snan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_add_nc_u16 v1, v0, -1
-; GFX11CHECK-NEXT:    v_add_nc_u16 v2, 0xff80, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e64 s1, 0x7fbf, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v1
-; GFX11CHECK-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f00, v2
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, vcc_lo
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, s1
-; GFX11CHECK-NEXT:    s_or_b32 s0, s0, s2
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_snan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v0.h, v0.l, -1
+; GFX11SELDAG-TRUE16-NEXT:    v_add_nc_u16 v1.l, 0xff80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e64 s1, 0x7fbf, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v0.h
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f00, v1.l
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, s1
+; GFX11SELDAG-TRUE16-NEXT:    s_or_b32 s0, s0, s2
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_snan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v1, v0, -1
+; GFX11SELDAG-FAKE16-NEXT:    v_add_nc_u16 v2, 0xff80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e64 s1, 0x7fbf, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e64 s0, 0x7f, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_u16_e64 s2, 0x7f00, v2
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, s1
+; GFX11SELDAG-FAKE16-NEXT:    s_or_b32 s0, s0, s2
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 926)  ; ~(0x60|0x1) = "~(zero|snan)"
   ret i1 %0
@@ -2737,13 +3166,21 @@ define i1 @isinf_or_nan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isinf_or_nan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isinf_or_nan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isinf_or_nan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 519)  ; 0x204|0x3 = "inf|nan"
   ret i1 %0
@@ -2786,13 +3223,21 @@ define i1 @not_isinf_or_nan_bf16(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isinf_or_nan_bf16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isinf_or_nan_bf16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isinf_or_nan_bf16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 504)  ; ~(0x204|0x3) = "~(inf|nan)"
   ret i1 %0
@@ -2835,13 +3280,21 @@ define i1 @isfinite_or_nan_f(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isfinite_or_nan_f:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isfinite_or_nan_f:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isfinite_or_nan_f:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 507)  ; 0x1f8|0x3 = "finite|nan"
   ret i1 %0
@@ -2884,13 +3337,21 @@ define i1 @not_isfinite_or_nan_f(bfloat %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isfinite_or_nan_f:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11CHECK-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isfinite_or_nan_f:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isfinite_or_nan_f:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516)  ; ~(0x1f8|0x3) = "~(finite|nan)"
   ret i1 %0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index 1d869559d9e77..8c0393b627110 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -7,8 +7,10 @@
 ; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s  | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s
 ; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s
 ; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s
-; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s
-; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-TRUE16 %s
+; RUN:  llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG,GFX11SELDAG-FAKE16 %s
+; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-TRUE16 %s
+; RUN:  llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL,GFX11GLISEL-FAKE16 %s
 
 define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX7SELDAG-LABEL: sgpr_isnan_f16:
@@ -76,17 +78,42 @@ define amdgpu_kernel void @sgpr_isnan_f16(ptr addrspace(1) %out, half %x) {
 ; GFX10CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10CHECK-NEXT:    s_endpgm
 ;
-; GFX11CHECK-LABEL: sgpr_isnan_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_clause 0x1
-; GFX11CHECK-NEXT:    s_load_b32 s2, s[4:5], 0x2c
-; GFX11CHECK-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
-; GFX11CHECK-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11CHECK-NEXT:    s_endpgm
+; GFX11SELDAG-TRUE16-LABEL: sgpr_isnan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_clause 0x1
+; GFX11SELDAG-TRUE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11SELDAG-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11SELDAG-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s2, v0.l, 3
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, -1, s2
+; GFX11SELDAG-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11SELDAG-TRUE16-NEXT:    s_endpgm
+;
+; GFX11SELDAG-FAKE16-LABEL: sgpr_isnan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_clause 0x1
+; GFX11SELDAG-FAKE16-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11SELDAG-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11SELDAG-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11SELDAG-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11SELDAG-FAKE16-NEXT:    s_endpgm
+;
+; GFX11GLISEL-LABEL: sgpr_isnan_f16:
+; GFX11GLISEL:       ; %bb.0:
+; GFX11GLISEL-NEXT:    s_clause 0x1
+; GFX11GLISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
+; GFX11GLISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11GLISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11GLISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s2, s2, 3
+; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v1, 0, -1, s2
+; GFX11GLISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11GLISEL-NEXT:    s_endpgm
   %result = call i1 @llvm.is.fpclass.f16(half %x, i32 3)
   %sext = sext i1 %result to i32
   store i32 %sext, ptr addrspace(1) %out, align 4
@@ -208,12 +235,33 @@ define i1 @snan_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: snan_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 1
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: snan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 1
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: snan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: snan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 1
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: snan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 1
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 1)  ; 0x001
   ret i1 %1
 }
@@ -260,12 +308,33 @@ define i1 @qnan_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: qnan_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 2
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: qnan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 2
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: qnan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 2
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: qnan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 2
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: qnan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 2
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 2)  ; 0x002
   ret i1 %1
 }
@@ -312,12 +381,33 @@ define i1 @posinf_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: posinf_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x200
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: posinf_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x200
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: posinf_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x200
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: posinf_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x200
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: posinf_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x200
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 512)  ; 0x200
   ret i1 %1
 }
@@ -362,12 +452,33 @@ define i1 @neginf_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: neginf_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 4
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: neginf_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 4
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: neginf_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 4
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: neginf_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 4
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: neginf_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 4
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 4)  ; 0x004
   ret i1 %1
 }
@@ -426,12 +537,33 @@ define i1 @posnormal_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: posnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x100
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: posnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x100
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: posnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x100
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: posnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x100
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: posnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x100
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 256)  ; 0x100
   ret i1 %1
 }
@@ -488,12 +620,33 @@ define i1 @negnormal_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 8
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 8
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 8
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: negnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 8
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: negnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 8
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 8)  ; 0x008
   ret i1 %1
 }
@@ -543,12 +696,33 @@ define i1 @possubnormal_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: possubnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x80
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: possubnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x80
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: possubnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x80
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: possubnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x80
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: possubnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x80
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 128)  ; 0x080
   ret i1 %1
 }
@@ -604,12 +778,33 @@ define i1 @negsubnormal_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negsubnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 16
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negsubnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 16
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negsubnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 16
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: negsubnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 16
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: negsubnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 16
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 16)  ; 0x010
   ret i1 %1
 }
@@ -652,12 +847,33 @@ define i1 @poszero_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: poszero_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 64
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: poszero_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 64
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: poszero_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 64
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: poszero_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 64
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: poszero_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 64
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 64)  ; 0x040
   ret i1 %1
 }
@@ -702,12 +918,33 @@ define i1 @negzero_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negzero_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 32
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negzero_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 32
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negzero_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 32
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: negzero_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 32
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: negzero_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 32
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 32)  ; 0x020
   ret i1 %1
 }
@@ -754,12 +991,33 @@ define i1 @posfinite_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: posfinite_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1c0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: posfinite_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1c0
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: posfinite_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1c0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: posfinite_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1c0
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: posfinite_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1c0
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 448)  ; 0x1c0
   ret i1 %1
 }
@@ -812,12 +1070,33 @@ define i1 @negfinite_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: negfinite_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 56
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: negfinite_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 56
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: negfinite_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 56
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: negfinite_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 56
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: negfinite_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 56
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 56)  ; 0x038
   ret i1 %1
 }
@@ -864,12 +1143,33 @@ define i1 @isnan_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnan_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3)  ; nan
   ret i1 %1
 }
@@ -918,12 +1218,33 @@ define i1 @not_isnan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isnan_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3fc
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isnan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3fc
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isnan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3fc
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_isnan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3fc
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_isnan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3fc
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = call i1 @llvm.is.fpclass.f16(half %x, i32 1020)  ; ~nan
   ret i1 %class
 }
@@ -1018,25 +1339,45 @@ define <2 x i1> @isnan_v2f16(<2 x half> %x) nounwind {
 ; GFX10GLISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s4
 ; GFX10GLISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11SELDAG-LABEL: isnan_v2f16:
-; GFX11SELDAG:       ; %bb.0:
-; GFX11SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11GLISEL-LABEL: isnan_v2f16:
-; GFX11GLISEL:       ; %bb.0:
-; GFX11GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v1, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX11GLISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_v2f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_v2f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnan_v2f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.h, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, v2
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnan_v2f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v1, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call <2 x i1> @llvm.is.fpclass.v2f16(<2 x half> %x, i32 3)  ; nan
   ret <2 x i1> %1
 }
@@ -1156,31 +1497,56 @@ define <3 x i1> @isnan_v3f16(<3 x half> %x) nounwind {
 ; GFX10GLISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s4
 ; GFX10GLISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11SELDAG-LABEL: isnan_v3f16:
-; GFX11SELDAG:       ; %bb.0:
-; GFX11SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
-; GFX11SELDAG-NEXT:    v_mov_b32_e32 v1, v3
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11GLISEL-LABEL: isnan_v3f16:
-; GFX11GLISEL:       ; %bb.0:
-; GFX11GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v2, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v1, 3
-; GFX11GLISEL-NEXT:    v_mov_b32_e32 v1, v3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11GLISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_v3f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2.l, v2.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT:    v_mov_b32_e32 v1, v3
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_v3f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v2, v2
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_mov_b32_e32 v1, v3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnan_v3f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.h, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v1.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnan_v3f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v2, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v1, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_mov_b32_e32 v1, v3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call <3 x i1> @llvm.is.fpclass.v3f16(<3 x half> %x, i32 3)  ; nan
   ret <3 x i1> %1
 }
@@ -1322,35 +1688,65 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind {
 ; GFX10GLISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s4
 ; GFX10GLISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11SELDAG-LABEL: isnan_v4f16:
-; GFX11SELDAG:       ; %bb.0:
-; GFX11SELDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
-; GFX11SELDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11SELDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
-; GFX11SELDAG-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
-; GFX11SELDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11GLISEL-LABEL: isnan_v4f16:
-; GFX11GLISEL:       ; %bb.0:
-; GFX11GLISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
-; GFX11GLISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11GLISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v1, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v3, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX11GLISEL-NEXT:    v_cmp_class_f16_e64 s0, v4, 3
-; GFX11GLISEL-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
-; GFX11GLISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_v4f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0.l, v0.l
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11SELDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1.l, v1.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4.l, v4.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3.l, v3.l
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_v4f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v0, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11SELDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v1, v1
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v4, v4
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_u_f16_e32 vcc_lo, v3, v3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnan_v4f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.h, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v1.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, v4
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v1.h, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_mov_b32_e32 v1, v5
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnan_v4f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11GLISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v1, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v3, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v4, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call <4 x i1> @llvm.is.fpclass.v4f16(<4 x half> %x, i32 3)  ; nan
   ret <4 x i1> %1
 }
@@ -1400,12 +1796,33 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnan_f16_strictfp:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnan_f16_strictfp:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnan_f16_strictfp:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnan_f16_strictfp:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnan_f16_strictfp:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 3) strictfp ; nan
   ret i1 %1
 }
@@ -1454,12 +1871,33 @@ define i1 @isinf_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isinf_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isinf_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x204
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isinf_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isinf_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x204
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isinf_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 516)  ; 0x204 = "inf"
   ret i1 %1
 }
@@ -1508,12 +1946,33 @@ define i1 @isfinite_f16(half %x) nounwind {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isfinite_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1f8
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isfinite_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1f8
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isfinite_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1f8
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isfinite_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1f8
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isfinite_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1f8
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %1 = call i1 @llvm.is.fpclass.f16(half %x, i32 504)  ; 0x1f8 = "finite"
   ret i1 %1
 }
@@ -1560,12 +2019,33 @@ define i1 @issubnormal_or_zero_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: issubnormal_or_zero_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0xf0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: issubnormal_or_zero_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0xf0
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: issubnormal_or_zero_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0xf0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: issubnormal_or_zero_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0xf0
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: issubnormal_or_zero_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0xf0
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 240)  ; 0xf0 = "subnormal|zero"
   ret i1 %class
@@ -1619,12 +2099,33 @@ define i1 @not_issubnormal_or_zero_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_issubnormal_or_zero_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x30f
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_or_zero_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x30f
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_or_zero_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x30f
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_issubnormal_or_zero_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x30f
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_issubnormal_or_zero_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x30f
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
     %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 783)  ; ~0xf0 = "~(subnormal|zero)"
   ret i1 %class
@@ -1677,12 +2178,33 @@ define i1 @isnormal_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x108
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x108
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x108
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x108
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x108
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 264)  ; 0x108 = "normal"
   ret i1 %class
 }
@@ -1739,12 +2261,33 @@ define i1 @not_isnormal_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x2f7
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x2f7
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x2f7
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_isnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x2f7
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_isnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x2f7
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 759)  ; ~0x108 = "~normal"
   ret i1 %class
 }
@@ -1812,12 +2355,33 @@ define i1 @not_is_plus_normal_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_is_plus_normal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x2ff
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_is_plus_normal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x2ff
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_is_plus_normal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x2ff
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_is_plus_normal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x2ff
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_is_plus_normal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x2ff
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 767)  ; ~0x100 = ~"+normal"
   ret i1 %class
 }
@@ -1885,12 +2449,33 @@ define i1 @not_is_neg_normal_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_is_neg_normal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3f7
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_is_neg_normal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3f7
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_is_neg_normal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3f7
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_is_neg_normal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3f7
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_is_neg_normal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3f7
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 1015)  ; ~0x008 = ~"-normal"
   ret i1 %class
 }
@@ -1941,12 +2526,33 @@ define i1 @issubnormal_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: issubnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x90
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: issubnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x90
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: issubnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x90
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: issubnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x90
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: issubnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x90
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 144)  ; 0x90 = "subnormal"
   ret i1 %class
 }
@@ -2005,12 +2611,33 @@ define i1 @not_issubnormal_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_issubnormal_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x36f
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_issubnormal_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x36f
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_issubnormal_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x36f
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_issubnormal_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x36f
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_issubnormal_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x36f
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 879)  ; ~0x90 = ~"subnormal"
   ret i1 %class
 }
@@ -2057,12 +2684,33 @@ define i1 @iszero_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x60
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x60
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x60
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: iszero_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x60
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: iszero_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x60
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 96)  ; 0x60 = "zero"
   ret i1 %class
 }
@@ -2122,12 +2770,33 @@ define i1 @not_iszero_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39f
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39f
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39f
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_iszero_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39f
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_iszero_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39f
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 927)  ; ~0x60 = ~"zero"
   ret i1 %class
 }
@@ -2174,12 +2843,33 @@ define i1 @ispositive_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: ispositive_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3c0
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: ispositive_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3c0
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: ispositive_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3c0
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: ispositive_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3c0
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: ispositive_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3c0
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 960)  ; fcPositive
   ret i1 %class
 }
@@ -2242,12 +2932,33 @@ define i1 @not_ispositive_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_ispositive_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 63
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_ispositive_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 63
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_ispositive_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 63
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_ispositive_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 63
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_ispositive_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 63
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 63)  ; ~fcPositive
   ret i1 %class
 }
@@ -2306,12 +3017,33 @@ define i1 @isnegative_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isnegative_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 60
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isnegative_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 60
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isnegative_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 60
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isnegative_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 60
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isnegative_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 60
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 60)  ; fcNegative
   ret i1 %class
 }
@@ -2367,12 +3099,33 @@ define i1 @not_isnegative_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isnegative_f16:
-; GFX11CHECK:       ; %bb.0:
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3c3
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isnegative_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0:
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3c3
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isnegative_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0:
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3c3
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_isnegative_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0:
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x3c3
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_isnegative_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0:
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x3c3
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %class = tail call i1 @llvm.is.fpclass.f16(half %x, i32 963)  ; ~fcNegative
   ret i1 %class
 }
@@ -2424,12 +3177,33 @@ define i1 @iszero_or_nan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_nan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x63
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x63
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 99)  ; 0x60|0x3 = "zero|nan"
   ret i1 %0
@@ -2482,12 +3256,33 @@ define i1 @iszero_or_nan_f_daz(half %x) #0 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_nan_f_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x63
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f_daz:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x63
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f_daz:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 99)  ; 0x60|0x3 = "zero|nan"
   ret i1 %0
@@ -2540,12 +3335,33 @@ define i1 @iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_nan_f_maybe_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x63
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: iszero_or_nan_f_maybe_daz:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x63
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: iszero_or_nan_f_maybe_daz:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x63
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 99)  ; 0x60|0x3 = "zero|nan"
   ret i1 %0
@@ -2607,12 +3423,33 @@ define i1 @not_iszero_or_nan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_nan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39c
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39c
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 924)  ; ~0x60 = "~(zero|nan)"
   ret i1 %0
@@ -2674,12 +3511,33 @@ define i1 @not_iszero_or_nan_f_daz(half %x) #0 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_nan_f_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39c
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f_daz:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39c
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f_daz:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 924)  ; ~(0x60|0x3) = "~(zero|nan)"
   ret i1 %0
@@ -2741,12 +3599,33 @@ define i1 @not_iszero_or_nan_f_maybe_daz(half %x) #1 {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_nan_f_maybe_daz:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39c
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_nan_f_maybe_daz:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39c
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_nan_f_maybe_daz:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39c
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 924)  ; ~(0x60|0x3) = "~(zero|nan)"
   ret i1 %0
@@ -2799,12 +3678,33 @@ define i1 @iszero_or_qnan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_qnan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x62
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_qnan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x62
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_qnan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x62
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: iszero_or_qnan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x62
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: iszero_or_qnan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x62
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 98)  ; 0x60|0x2 = "zero|qnan"
   ret i1 %0
@@ -2862,12 +3762,33 @@ define i1 @iszero_or_snan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: iszero_or_snan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x61
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: iszero_or_snan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x61
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: iszero_or_snan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x61
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: iszero_or_snan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x61
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: iszero_or_snan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x61
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 97)  ; 0x60|0x1 = "zero|snan"
   ret i1 %0
@@ -2945,12 +3866,33 @@ define i1 @not_iszero_or_qnan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_qnan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39d
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_qnan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39d
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_qnan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39d
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_qnan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39d
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_qnan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39d
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 925)  ; ~(0x60|0x2) = "~(zero|qnan)"
   ret i1 %0
@@ -3025,12 +3967,33 @@ define i1 @not_iszero_or_snan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_iszero_or_snan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39e
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_iszero_or_snan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39e
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_iszero_or_snan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39e
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_iszero_or_snan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x39e
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_iszero_or_snan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x39e
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 926)  ; ~(0x60|0x1) = "~(zero|snan)"
   ret i1 %0
@@ -3080,12 +4043,33 @@ define i1 @isinf_or_nan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isinf_or_nan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x207
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isinf_or_nan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x207
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isinf_or_nan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x207
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isinf_or_nan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x207
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isinf_or_nan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x207
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 519)  ; 0x204|0x3 = "inf|nan"
   ret i1 %0
@@ -3135,12 +4119,33 @@ define i1 @not_isinf_or_nan_f16(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isinf_or_nan_f16:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1f8
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isinf_or_nan_f16:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1f8
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isinf_or_nan_f16:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1f8
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_isinf_or_nan_f16:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1f8
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_isinf_or_nan_f16:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1f8
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 504)  ; ~(0x204|0x3) = "~(inf|nan)"
   ret i1 %0
@@ -3190,12 +4195,33 @@ define i1 @isfinite_or_nan_f(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: isfinite_or_nan_f:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1fb
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: isfinite_or_nan_f:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1fb
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: isfinite_or_nan_f:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1fb
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: isfinite_or_nan_f:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x1fb
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: isfinite_or_nan_f:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x1fb
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 507)  ; 0x1f8|0x3 = "finite|nan"
   ret i1 %0
@@ -3245,12 +4271,33 @@ define i1 @not_isfinite_or_nan_f(half %x) {
 ; GFX10CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s4
 ; GFX10CHECK-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11CHECK-LABEL: not_isfinite_or_nan_f:
-; GFX11CHECK:       ; %bb.0: ; %entry
-; GFX11CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11CHECK-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
-; GFX11CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
-; GFX11CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GFX11SELDAG-TRUE16-LABEL: not_isfinite_or_nan_f:
+; GFX11SELDAG-TRUE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x204
+; GFX11SELDAG-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11SELDAG-FAKE16-LABEL: not_isfinite_or_nan_f:
+; GFX11SELDAG-FAKE16:       ; %bb.0: ; %entry
+; GFX11SELDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11SELDAG-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
+; GFX11SELDAG-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11SELDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-TRUE16-LABEL: not_isfinite_or_nan_f:
+; GFX11GLISEL-TRUE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-TRUE16-NEXT:    v_cmp_class_f16_e64 s0, v0.l, 0x204
+; GFX11GLISEL-TRUE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11GLISEL-FAKE16-LABEL: not_isfinite_or_nan_f:
+; GFX11GLISEL-FAKE16:       ; %bb.0: ; %entry
+; GFX11GLISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11GLISEL-FAKE16-NEXT:    v_cmp_class_f16_e64 s0, v0, 0x204
+; GFX11GLISEL-FAKE16-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11GLISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = tail call i1 @llvm.is.fpclass.f16(half %x, i32 516)  ; ~(0x1f8|0x3) = "~(finite|nan)"
   ret i1 %0
@@ -3266,3 +4313,5 @@ attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" }
 
 ; Maybe daz
 attributes #1 = { "denormal-fp-math"="ieee,dynamic" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11SELDAG: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
index 76ca99059d58d..afede06001736 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll
@@ -4,8 +4,10 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
 
 define half @v_maximum_f16(half %src0, half %src1) {
 ; GFX7-LABEL: v_maximum_f16:
@@ -53,24 +55,43 @@ define half @v_maximum_f16(half %src0, half %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximum_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_maximum_f16:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_maximum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_maximum_f16:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_maximum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call half @llvm.maximum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -110,21 +131,37 @@ define half @v_maximum_f16__nnan(half %src0, half %src1) {
 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_f16__nnan:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximum_f16__nnan:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_f16__nnan:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_f16__nnan:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_maximum_f16__nnan:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_maximum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_maximum_f16__nnan:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_maximum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan half @llvm.maximum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -175,24 +212,43 @@ define half @v_maximum_f16__nsz(half %src0, half %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximum_f16__nsz:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_maximum_f16__nsz:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_maximum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_maximum_f16__nsz:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_maximum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call nsz half @llvm.maximum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -232,21 +288,37 @@ define half @v_maximum_f16__nnan_nsz(half %src0, half %src1) {
 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_f16__nnan_nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximum_f16__nnan_nsz:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_f16__nnan_nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_f16__nnan_nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_nsz:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_maximum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_maximum_f16__nnan_nsz:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_maximum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan nsz half @llvm.maximum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -302,27 +374,49 @@ define half @v_maximum_f16__nnan_src0(half %arg0, half %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_f16__nnan_src0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximum_f16__nnan_src0:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_f16__nnan_src0:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_f16__nnan_src0:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src0:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_maximum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_maximum_f16__nnan_src0:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_maximum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %src0 = fadd nnan half %arg0, 1.0
   %op = call half @llvm.maximum.f16(half %src0, half %src1)
   ret half %op
@@ -379,27 +473,49 @@ define half @v_maximum_f16__nnan_src1(half %src0, half %arg1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_f16__nnan_src1:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_max_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_maximum_f16__nnan_src1:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_maximum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_f16__nnan_src1:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.h, 1.0, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_f16__nnan_src1:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_maximum_f16__nnan_src1:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_add_f16_e32 v1.l, 1.0, v1.l
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_maximum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_maximum_f16__nnan_src1:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_maximum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %src1 = fadd nnan half %arg1, 1.0
   %op = call half @llvm.maximum.f16(half %src0, half %src1)
   ret half %op
@@ -475,18 +591,31 @@ define void @s_maximum_f16(half inreg %src0, half inreg %src1) {
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: s_maximum_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_max_f16_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    ;;#ASMSTART
-; GFX11-NEXT:    ; use v0
-; GFX11-NEXT:    ;;#ASMEND
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: s_maximum_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_max_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: s_maximum_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: s_maximum_f16:
 ; GFX12:       ; %bb.0:
@@ -580,21 +709,36 @@ define <2 x half> @v_maximum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v2.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f16:
 ; GFX12:       ; %bb.0:
@@ -739,21 +883,36 @@ define <2 x half> @v_maximum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v2f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v2, v0, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v2f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v4, v0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v2.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v2f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v2f16__nsz:
 ; GFX12:       ; %bb.0:
@@ -929,25 +1088,50 @@ define void @s_maximum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: s_maximum_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
-; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    ;;#ASMSTART
-; GFX11-NEXT:    ; use v0
-; GFX11-NEXT:    ;;#ASMEND
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: s_maximum_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, s1
+; GFX11-TRUE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX11-TRUE16-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, s2
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.h, s3
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v1.l, v1.h
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: s_maximum_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v0, s0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: s_maximum_v2f16:
 ; GFX12:       ; %bb.0:
@@ -1053,25 +1237,43 @@ define <3 x half> @v_maximum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v3f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v3f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s1
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v3f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f16:
 ; GFX12:       ; %bb.0:
@@ -1245,25 +1447,43 @@ define <3 x half> @v_maximum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v3f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v4, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v3f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v6, v0, v2
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s1
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v3f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v3f16__nsz:
 ; GFX12:       ; %bb.0:
@@ -1459,30 +1679,52 @@ define <4 x half> @v_maximum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v4f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-NEXT:    v_pk_max_f16 v7, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v4f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v8, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v7.l, v6.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v8.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v3.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v4f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v7, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f16:
 ; GFX12:       ; %bb.0:
@@ -1685,30 +1927,52 @@ define <4 x half> @v_maximum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v4f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v4, v1, v3
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-NEXT:    v_pk_max_f16 v7, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v4f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v8, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v7.l, v6.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v8.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v3.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v4f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v7, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v4f16__nsz:
 ; GFX12:       ; %bb.0:
@@ -1989,48 +2253,85 @@ define <8 x half> @v_maximum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v3, v3, v10, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v8f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v8, v3, v7
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    v_pk_max_f16 v10, v2, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
-; GFX11-NEXT:    v_pk_max_f16 v14, v1, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
-; GFX11-NEXT:    v_pk_max_f16 v11, v0, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX11-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v8f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3.l, v7.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v8, v3, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.l, v6.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v10, v2, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v8.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v9.l, v7.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v10.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v11.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v1.l, v5.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v12, v0, v4
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v5
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v8.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v0.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v11.l, v9.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v10
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v12.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v5.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v7.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v8f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v8, v3, v7
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v10, v2, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v14, v1, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v11, v0, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v8f16:
 ; GFX12:       ; %bb.0:
@@ -2392,90 +2693,159 @@ define <16 x half> @v_maximum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v4, v4, v13, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_maximum_v16f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_max_f16 v16, v7, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
-; GFX11-NEXT:    v_pk_max_f16 v15, v6, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-NEXT:    v_pk_max_f16 v20, v4, v12
-; GFX11-NEXT:    v_pk_max_f16 v22, v2, v10
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
-; GFX11-NEXT:    v_pk_max_f16 v14, v5, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-NEXT:    v_pk_max_f16 v17, v3, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
-; GFX11-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
-; GFX11-NEXT:    v_pk_max_f16 v19, v1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-NEXT:    v_pk_max_f16 v22, v0, v8
-; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
-; GFX11-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_maximum_v16f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v15
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v18, v7, v15
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7.l, v15.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v6.l, v14.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v19, v5, v13
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v17.l, v16.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v18
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v16, v6, v14
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, 0x7e00, v18.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, 0x7e00, v15.l, s0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, 0x7e00, v16.l, s1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v5.l, v13.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v19
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v15.l, v14.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v16
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v18.l, v17.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v4.l, v12.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v15, v4, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, 0x7e00, v14.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, 0x7e00, v19.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, 0x7e00, v13.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, 0x7e00, v15.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v16.l, v12.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v15
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v11.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v13, v3, v11
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v2.l, v10.l
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v15, v2, v10
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v13.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v14.l, v11.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v15.l, s1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v16.l, v10.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.l, v9.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v9
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v17, v0, v8
+; GFX11-TRUE16-NEXT:    v_pk_max_f16 v1, v1, v9
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v13.l, v10.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v0.l, v8.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s5, v16.l, v14.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v17
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v15
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v17.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v8.l, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v9.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v10.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v11.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, 0x7e00, v12.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_maximum_v16f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v16, v7, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v15, v6, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v20, v4, v12
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v22, v2, v10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v14, v5, v13
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v17, v3, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v19, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-FAKE16-NEXT:    v_pk_max_f16 v22, v0, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_maximum_v16f16:
 ; GFX12:       ; %bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
index c472ee39a41e4..a645a8ab5d2f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll
@@ -5,8 +5,10 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
 
 define half @v_minimum_f16(half %src0, half %src1) {
 ; GFX8-LABEL: v_minimum_f16:
@@ -41,24 +43,43 @@ define half @v_minimum_f16(half %src0, half %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimum_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_minimum_f16:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_minimum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_minimum_f16:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_minimum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call half @llvm.minimum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -88,21 +109,37 @@ define half @v_minimum_f16__nnan(half %src0, half %src1) {
 ; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_f16__nnan:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimum_f16__nnan:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_f16__nnan:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_f16__nnan:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_minimum_f16__nnan:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_minimum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_minimum_f16__nnan:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_minimum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan half @llvm.minimum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -140,24 +177,43 @@ define half @v_minimum_f16__nsz(half %src0, half %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimum_f16__nsz:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_minimum_f16__nsz:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_minimum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_minimum_f16__nsz:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_minimum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call nsz half @llvm.minimum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -187,21 +243,37 @@ define half @v_minimum_f16__nnan_nsz(half %src0, half %src1) {
 ; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_f16__nnan_nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimum_f16__nnan_nsz:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_f16__nnan_nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_f16__nnan_nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_nsz:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_minimum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_minimum_f16__nnan_nsz:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_minimum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call nnan nsz half @llvm.minimum.f16(half %src0, half %src1)
   ret half %op
 }
@@ -243,27 +315,49 @@ define half @v_minimum_f16__nnan_src0(half %arg0, half %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_f16__nnan_src0:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimum_f16__nnan_src0:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_f16__nnan_src0:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_f16__nnan_src0:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src0:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_add_f16_e32 v0.l, 1.0, v0.l
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_minimum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_minimum_f16__nnan_src0:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_minimum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %src0 = fadd nnan half %arg0, 1.0
   %op = call half @llvm.minimum.f16(half %src0, half %src1)
   ret half %op
@@ -306,27 +400,49 @@ define half @v_minimum_f16__nnan_src1(half %src0, half %arg1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_f16__nnan_src1:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_min_f16_e32 v2, v0, v1
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX12-LABEL: v_minimum_f16__nnan_src1:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GFX12-NEXT:    s_wait_expcnt 0x0
-; GFX12-NEXT:    s_wait_samplecnt 0x0
-; GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_add_f16_e32 v1, 1.0, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_minimum_f16 v0, v0, v1
-; GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_f16__nnan_src1:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.h, 1.0, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_f16__nnan_src1:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-TRUE16-LABEL: v_minimum_f16__nnan_src1:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    v_add_f16_e32 v1.l, 1.0, v1.l
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_minimum_f16 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX12-FAKE16-LABEL: v_minimum_f16__nnan_src1:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    v_add_f16_e32 v1, 1.0, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_minimum_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %src1 = fadd nnan half %arg1, 1.0
   %op = call half @llvm.minimum.f16(half %src0, half %src1)
   ret half %op
@@ -385,18 +501,31 @@ define void @s_minimum_f16(half inreg %src0, half inreg %src1) {
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: s_minimum_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_min_f16_e64 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    ;;#ASMSTART
-; GFX11-NEXT:    ; use v0
-; GFX11-NEXT:    ;;#ASMEND
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: s_minimum_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, s0, s1
+; GFX11-TRUE16-NEXT:    v_min_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: s_minimum_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_min_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: s_minimum_f16:
 ; GFX12:       ; %bb.0:
@@ -470,21 +599,36 @@ define <2 x half> @v_minimum_v2f16(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v2.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f16:
 ; GFX12:       ; %bb.0:
@@ -594,21 +738,36 @@ define <2 x half> @v_minimum_v2f16__nsz(<2 x half> %src0, <2 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v2f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v2, v0, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v2f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v4, v0, v1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v2.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v4.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v2f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v2, v0, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v2f16__nsz:
 ; GFX12:       ; %bb.0:
@@ -742,25 +901,50 @@ define void @s_minimum_v2f16(<2 x half> inreg %src0, <2 x half> inreg %src1) {
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: s_minimum_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v0, s0, s1
-; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
-; GFX11-NEXT:    s_lshr_b32 s2, s1, 16
-; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    ;;#ASMSTART
-; GFX11-NEXT:    ; use v0
-; GFX11-NEXT:    ;;#ASMEND
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: s_minimum_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s0
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, s1
+; GFX11-TRUE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX11-TRUE16-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.l, s2
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v1.h, s3
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v2, s0, s1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v1.l, v1.h
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: s_minimum_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v0, s0, s1
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s1
+; GFX11-FAKE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11-FAKE16-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e64 vcc_lo, s0, s2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: s_minimum_v2f16:
 ; GFX12:       ; %bb.0:
@@ -839,25 +1023,43 @@ define <3 x half> @v_minimum_v3f16(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v3f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v3f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s1
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v3f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f16:
 ; GFX12:       ; %bb.0:
@@ -984,25 +1186,43 @@ define <3 x half> @v_minimum_v3f16__nsz(<3 x half> %src0, <3 x half> %src1) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v2, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v3f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v4, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v3f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v6, v0, v2
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s1
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v3f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v3f16__nsz:
 ; GFX12:       ; %bb.0:
@@ -1144,30 +1364,52 @@ define <4 x half> @v_minimum_v4f16(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v4f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-NEXT:    v_pk_min_f16 v7, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v4f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v8, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v7.l, v6.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v8.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v3.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v4f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v7, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f16:
 ; GFX12:       ; %bb.0:
@@ -1311,30 +1553,52 @@ define <4 x half> @v_minimum_v4f16__nsz(<4 x half> %src0, <4 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v1, v1, v6, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v4f16__nsz:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v4, v1, v3
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-NEXT:    v_pk_min_f16 v7, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v4f16__nsz:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v8, v0, v2
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v7.l, v6.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v8.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v2.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v3.l, s0
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v4f16__nsz:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v4, v1, v3
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v7, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v8
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v4f16__nsz:
 ; GFX12:       ; %bb.0:
@@ -1528,48 +1792,85 @@ define <8 x half> @v_minimum_v8f16(<8 x half> %src0, <8 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v3, v3, v10, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v8f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v8, v3, v7
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    v_pk_min_f16 v10, v2, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
-; GFX11-NEXT:    v_pk_min_f16 v14, v1, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
-; GFX11-NEXT:    v_pk_min_f16 v11, v0, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
-; GFX11-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
-; GFX11-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v8f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3.l, v7.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v8, v3, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v2.l, v6.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v10, v2, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v8.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v9.l, v7.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v10.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v11.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v1.l, v5.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v12, v0, v4
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v5
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v8.l, v6.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v0.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v11.l, v9.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v10
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v12.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v4.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v5.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v7.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v8f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v8, v3, v7
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v10, v2, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v2
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v14, v1, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, 0x7e00, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v12, v11
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v11, v0, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v11
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, 0x7e00, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v6, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v13, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v5
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v4, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v7
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v10, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v3, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v8f16:
 ; GFX12:       ; %bb.0:
@@ -1811,90 +2112,159 @@ define <16 x half> @v_minimum_v16f16(<16 x half> %src0, <16 x half> %src1) {
 ; GFX10-NEXT:    v_perm_b32 v4, v4, v13, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_minimum_v16f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_min_f16 v16, v7, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
-; GFX11-NEXT:    v_pk_min_f16 v15, v6, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
-; GFX11-NEXT:    v_pk_min_f16 v20, v4, v12
-; GFX11-NEXT:    v_pk_min_f16 v22, v2, v10
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
-; GFX11-NEXT:    v_pk_min_f16 v14, v5, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
-; GFX11-NEXT:    v_pk_min_f16 v17, v3, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
-; GFX11-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
-; GFX11-NEXT:    v_pk_min_f16 v19, v1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11-NEXT:    v_pk_min_f16 v22, v0, v8
-; GFX11-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
-; GFX11-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
-; GFX11-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
-; GFX11-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_minimum_v16f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v15
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v18, v7, v15
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7.l, v15.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v6.l, v14.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v19, v5, v13
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v17.l, v16.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v18
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v16, v6, v14
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, 0x7e00, v18.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v14
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, 0x7e00, v15.l, s0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, 0x7e00, v16.l, s1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v5.l, v13.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v19
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v15.l, v14.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v16
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v18.l, v17.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v4.l, v12.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v15, v4, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, 0x7e00, v14.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, 0x7e00, v19.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, 0x7e00, v13.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, 0x7e00, v15.l, s2
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v16.l, v12.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v15
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v3.l, v11.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v13, v3, v11
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v2.l, v10.l
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v15, v2, v10
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, 0x7e00, v13.l, s0
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s0, v14.l, v11.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, 0x7e00, v15.l, s1
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s1, v16.l, v10.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s2, v1.l, v9.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v9
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v0
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v17, v0, v8
+; GFX11-TRUE16-NEXT:    v_pk_min_f16 v1, v1, v9
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s3, v13.l, v10.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s4, v0.l, v8.l
+; GFX11-TRUE16-NEXT:    v_cmp_o_f16_e64 s5, v16.l, v14.l
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v17
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v15
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x7e00, v1.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x7e00, v17.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x7e00, v8.l, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x7e00, v9.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, 0x7e00, v10.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, 0x7e00, v11.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, 0x7e00, v12.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_minimum_v16f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v16, v7, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v7
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v7, v15
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v15, v6, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v16
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v20, v4, v12
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v22, v2, v10
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, 0x7e00, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v6, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v15
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v14, v5, v13
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v16, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, 0x7e00, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v5, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v15, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, 0x7e00, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v18, v17
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v17, v3, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v20
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v17
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, 0x7e00, v20, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v3, v11
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v13, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, 0x7e00, v17, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v20, v19
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v19, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v22
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, 0x7e00, v21, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v11, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v17, 0x7e00, v22, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX11-FAKE16-NEXT:    v_pk_min_f16 v22, v0, v8
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v21, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v1, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v22
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x7e00, v19, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v0, v8
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v1, v21, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x7e00, v22, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v24, v23
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, 0x7e00, v25, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v2, v10
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v8, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, 0x7e00, v20, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_o_f16_e32 vcc_lo, v4, v12
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v2, v17, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, 0x7e00, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v4, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX12-LABEL: v_minimum_v16f16:
 ; GFX12:       ; %bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 8a2c6e2ad97e9..7e8c30161c1c8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
 
 declare half @llvm.minnum.f16(half %a, half %b)
 declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
@@ -113,31 +114,57 @@ define amdgpu_kernel void @minnum_f16_ieee(
 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: minnum_f16_ieee:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s6, s10
-; GFX11-NEXT:    s_mov_b32 s7, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-NEXT:    v_max_f16_e32 v1, v1, v1
-; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: minnum_f16_ieee:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.h, v1.l, v1.l
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: minnum_f16_ieee:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v1, v1, v1
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) #0 {
@@ -169,10 +196,20 @@ define amdgpu_ps half @minnum_f16_no_ieee(half %a, half %b) #0 {
 ; GFX9-NEXT:    v_min_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
-; GFX10PLUS-LABEL: minnum_f16_no_ieee:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    v_min_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    ; return to shader part epilog
+; GFX10-LABEL: minnum_f16_no_ieee:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: minnum_f16_no_ieee:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: minnum_f16_no_ieee:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
   %r.val = call half @llvm.minnum.f16(half %a, half %b)
   ret half %r.val
 }
@@ -255,24 +292,43 @@ define amdgpu_kernel void @minnum_f16_imm_a(
 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: minnum_f16_imm_a:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s10, s6
-; GFX11-NEXT:    s_mov_b32 s11, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s2
-; GFX11-NEXT:    s_mov_b32 s9, s3
-; GFX11-NEXT:    s_mov_b32 s4, s0
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_mov_b32 s5, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-NEXT:    v_min_f16_e32 v0, 0x4200, v0
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: minnum_f16_imm_a:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, 0x4200, v0.l
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: minnum_f16_imm_a:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, 0x4200, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %b) #0 {
 entry:
@@ -360,24 +416,43 @@ define amdgpu_kernel void @minnum_f16_imm_b(
 ; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: minnum_f16_imm_b:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s10, s6
-; GFX11-NEXT:    s_mov_b32 s11, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s2
-; GFX11-NEXT:    s_mov_b32 s9, s3
-; GFX11-NEXT:    s_mov_b32 s4, s0
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_mov_b32 s5, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
-; GFX11-NEXT:    v_min_f16_e32 v0, 4.0, v0
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: minnum_f16_imm_b:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
+; GFX11-TRUE16-NEXT:    v_min_f16_e32 v0.l, 4.0, v0.l
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: minnum_f16_imm_b:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_max_f16_e32 v0, v0, v0
+; GFX11-FAKE16-NEXT:    v_min_f16_e32 v0, 4.0, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) #0 {
 entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
index 3a2bf9d009460..27ec1cfadd9d2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
@@ -4,7 +4,8 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX89,GFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-TRUE16 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-FAKE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX12 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX12,GFX12-FAKE16 %s
 
 declare half @llvm.rint.f16(half %a)
 declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
@@ -84,23 +85,41 @@ define amdgpu_kernel void @rint_f16(
 ; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-FAKE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: rint_f16:
-; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    s_mov_b32 s6, -1
-; GFX12-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX12-NEXT:    s_mov_b32 s10, s6
-; GFX12-NEXT:    s_mov_b32 s11, s7
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_mov_b32 s8, s2
-; GFX12-NEXT:    s_mov_b32 s9, s3
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    buffer_load_u16 v0, off, s[8:11], null
-; GFX12-NEXT:    s_mov_b32 s5, s1
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_rndne_f16_e32 v0, v0
-; GFX12-NEXT:    buffer_store_b16 v0, off, s[4:7], null
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: rint_f16:
+; GFX12-TRUE16:       ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-TRUE16-NEXT:    buffer_load_u16 v0, off, s[8:11], null
+; GFX12-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_rndne_f16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: rint_f16:
+; GFX12-FAKE16:       ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-FAKE16-NEXT:    buffer_load_u16 v0, off, s[8:11], null
+; GFX12-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX12-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX12-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -229,27 +248,49 @@ define amdgpu_kernel void @rint_v2f16(
 ; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
 ; GFX11-FAKE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: rint_v2f16:
-; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    s_mov_b32 s6, -1
-; GFX12-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX12-NEXT:    s_mov_b32 s10, s6
-; GFX12-NEXT:    s_mov_b32 s11, s7
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_mov_b32 s8, s2
-; GFX12-NEXT:    s_mov_b32 s9, s3
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    buffer_load_b32 v0, off, s[8:11], null
-; GFX12-NEXT:    s_mov_b32 s5, s1
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX12-NEXT:    v_rndne_f16_e32 v0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_rndne_f16_e32 v1, v1
-; GFX12-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX12-NEXT:    buffer_store_b32 v0, off, s[4:7], null
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: rint_v2f16:
+; GFX12-TRUE16:       ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-TRUE16-NEXT:    buffer_load_b32 v1, off, s[8:11], null
+; GFX12-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_rndne_f16_e32 v0.l, v1.l
+; GFX12-TRUE16-NEXT:    v_rndne_f16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT:    ; kill: def $vgpr1 killed $vgpr1_lo16 killed $exec
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX12-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: rint_v2f16:
+; GFX12-FAKE16:       ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-FAKE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX12-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-FAKE16-NEXT:    v_rndne_f16_e32 v0, v0
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_rndne_f16_e32 v1, v1
+; GFX12-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX12-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -258,3 +299,5 @@ entry:
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
index 6927636ad04aa..e16540fec0229 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
@@ -3,8 +3,10 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX8 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX12 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GFX12,GFX12-FAKE16 %s
 
 define amdgpu_kernel void @sin_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX6-LABEL: sin_f16:
@@ -69,31 +71,57 @@ define amdgpu_kernel void @sin_f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: sin_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sin_f16_e32 v1, v1
-; GFX11-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: sin_f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_sin_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX11-TRUE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: sin_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_sin_f16_e32 v1, v1
-; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX11-FAKE16-LABEL: sin_f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_sin_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX11-FAKE16-NEXT:    s_endpgm
+;
+; GFX12-TRUE16-LABEL: sin_f16:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v0.l
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_sin_f16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: sin_f16:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_sin_f16_e32 v1, v1
+; GFX12-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %a.val = load half, ptr addrspace(1) %a
   %r.val = call half @llvm.sin.f16(half %a.val)
   store half %r.val, ptr addrspace(1) %r
@@ -184,42 +212,79 @@ define amdgpu_kernel void @sin_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: sin_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
-; GFX11-NEXT:    v_sin_f16_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sin_f16_e32 v2, v2
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_pack_b32_f16 v1, v1, v2
-; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: sin_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    global_load_b32 v0, v1, s[2:3]
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.h, 0.15915494, v2.l
+; GFX11-TRUE16-NEXT:    v_sin_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_sin_f16_e32 v0.h, v0.h
+; GFX11-TRUE16-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: sin_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
+; GFX11-FAKE16-NEXT:    v_sin_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_sin_f16_e32 v2, v2
+; GFX11-FAKE16-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, v2
+; GFX11-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-FAKE16-NEXT:    s_endpgm
+;
+; GFX12-TRUE16-LABEL: sin_v2f16:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_b32 v2, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_mul_f16_e32 v0.l, 0.15915494, v2.l
+; GFX12-TRUE16-NEXT:    v_mul_f16_e32 v0.h, 0.15915494, v2.l
+; GFX12-TRUE16-NEXT:    ; kill: def $vgpr2 killed $vgpr2_lo16 killed $exec
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_sin_f16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT:    v_sin_f16_e32 v0.h, v0.h
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(TRANS32_DEP_1)
+; GFX12-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX12-TRUE16-NEXT:    global_store_b32 v1, v0, s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: sin_v2f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_b32 v1, v0, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX12-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
-; GFX12-NEXT:    v_sin_f16_e32 v1, v1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
-; GFX12-NEXT:    v_sin_f16_e32 v2, v2
-; GFX12-NEXT:    v_pack_b32_f16 v1, v1, v2
-; GFX12-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-FAKE16-LABEL: sin_v2f16:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_b32 v1, v0, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX12-FAKE16-NEXT:    v_mul_f16_e32 v1, 0.15915494, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-FAKE16-NEXT:    v_mul_f16_e32 v2, 0.15915494, v2
+; GFX12-FAKE16-NEXT:    v_sin_f16_e32 v1, v1
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(TRANS32_DEP_1)
+; GFX12-FAKE16-NEXT:    v_sin_f16_e32 v2, v2
+; GFX12-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, v2
+; GFX12-FAKE16-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %a.val = load <2 x half>, ptr addrspace(1) %a
   %r.val = call <2 x half> @llvm.sin.v2f16(<2 x half> %a.val)
   store <2 x half> %r.val, ptr addrspace(1) %r
@@ -228,3 +293,6 @@ define amdgpu_kernel void @sin_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
 
 declare half @llvm.sin.f16(half %a)
 declare <2 x half> @llvm.sin.v2f16(<2 x half> %a)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX12: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
index 716dd3fbd4c74..2996a4e22a3ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
 
 declare half @llvm.sqrt.f16(half %a)
 declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
@@ -45,23 +46,41 @@ define amdgpu_kernel void @sqrt_f16(
 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: sqrt_f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s10, s6
-; GFX11-NEXT:    s_mov_b32 s11, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s2
-; GFX11-NEXT:    s_mov_b32 s9, s3
-; GFX11-NEXT:    s_mov_b32 s4, s0
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_mov_b32 s5, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: sqrt_f16:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: sqrt_f16:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -125,28 +144,51 @@ define amdgpu_kernel void @sqrt_v2f16(
 ; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: sqrt_v2f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s10, s6
-; GFX11-NEXT:    s_mov_b32 s11, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s2
-; GFX11-NEXT:    s_mov_b32 s9, s3
-; GFX11-NEXT:    s_mov_b32 s4, s0
-; GFX11-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_mov_b32 s5, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX11-NEXT:    v_sqrt_f16_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sqrt_f16_e32 v1, v1
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: sqrt_v2f16:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-TRUE16-NEXT:    v_sqrt_f16_e32 v0.l, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_sqrt_f16_e32 v0.h, v1.l
+; GFX11-TRUE16-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX11-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: sqrt_v2f16:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX11-FAKE16-NEXT:    v_sqrt_f16_e32 v0, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_sqrt_f16_e32 v1, v1
+; GFX11-FAKE16-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -155,3 +197,5 @@ entry:
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
index c1ba985d37453..ae41f4381251d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
 
 declare half @llvm.trunc.f16(half %a)
 declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)
@@ -83,23 +84,41 @@ define amdgpu_kernel void @trunc_f16(
 ; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-FAKE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: trunc_f16:
-; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    s_mov_b32 s6, -1
-; GFX12-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX12-NEXT:    s_mov_b32 s10, s6
-; GFX12-NEXT:    s_mov_b32 s11, s7
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_mov_b32 s8, s2
-; GFX12-NEXT:    s_mov_b32 s9, s3
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    buffer_load_u16 v0, off, s[8:11], null
-; GFX12-NEXT:    s_mov_b32 s5, s1
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_trunc_f16_e32 v0, v0
-; GFX12-NEXT:    buffer_store_b16 v0, off, s[4:7], null
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: trunc_f16:
+; GFX12-TRUE16:       ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-TRUE16-NEXT:    buffer_load_u16 v0, off, s[8:11], null
+; GFX12-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_trunc_f16_e32 v0.l, v0.l
+; GFX12-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: trunc_f16:
+; GFX12-FAKE16:       ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-FAKE16-NEXT:    buffer_load_u16 v0, off, s[8:11], null
+; GFX12-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_trunc_f16_e32 v0, v0
+; GFX12-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], null
+; GFX12-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -208,27 +227,49 @@ define amdgpu_kernel void @trunc_v2f16(
 ; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
 ; GFX11-FAKE16-NEXT:    s_endpgm
 ;
-; GFX12-LABEL: trunc_v2f16:
-; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    s_mov_b32 s6, -1
-; GFX12-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX12-NEXT:    s_mov_b32 s10, s6
-; GFX12-NEXT:    s_mov_b32 s11, s7
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_mov_b32 s8, s2
-; GFX12-NEXT:    s_mov_b32 s9, s3
-; GFX12-NEXT:    s_mov_b32 s4, s0
-; GFX12-NEXT:    buffer_load_b32 v0, off, s[8:11], null
-; GFX12-NEXT:    s_mov_b32 s5, s1
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX12-NEXT:    v_trunc_f16_e32 v0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_trunc_f16_e32 v1, v1
-; GFX12-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX12-NEXT:    buffer_store_b32 v0, off, s[4:7], null
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: trunc_v2f16:
+; GFX12-TRUE16:       ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-TRUE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-TRUE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-TRUE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-TRUE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-TRUE16-NEXT:    buffer_load_b32 v1, off, s[8:11], null
+; GFX12-TRUE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_trunc_f16_e32 v0.l, v1.l
+; GFX12-TRUE16-NEXT:    v_trunc_f16_e32 v0.h, v1.l
+; GFX12-TRUE16-NEXT:    ; kill: def $vgpr1 killed $vgpr1_lo16 killed $exec
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.l, v0.h
+; GFX12-TRUE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: trunc_v2f16:
+; GFX12-FAKE16:       ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX12-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX12-FAKE16-NEXT:    s_mov_b32 s10, s6
+; GFX12-FAKE16-NEXT:    s_mov_b32 s11, s7
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_mov_b32 s8, s2
+; GFX12-FAKE16-NEXT:    s_mov_b32 s9, s3
+; GFX12-FAKE16-NEXT:    s_mov_b32 s4, s0
+; GFX12-FAKE16-NEXT:    buffer_load_b32 v0, off, s[8:11], null
+; GFX12-FAKE16-NEXT:    s_mov_b32 s5, s1
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-FAKE16-NEXT:    v_trunc_f16_e32 v0, v0
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_trunc_f16_e32 v1, v1
+; GFX12-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX12-FAKE16-NEXT:    buffer_store_b32 v0, off, s[4:7], null
+; GFX12-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a) {
 entry:
@@ -237,3 +278,5 @@ entry:
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index b5c05d609b100..f6e9f152dca5e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -3,7 +3,8 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s
 ; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-FAKE16 %s
 
 define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) {
 ; GCN-NOHSA-SI-LABEL: constant_load_i16:
@@ -75,15 +76,25 @@ define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspac
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_load_i16:
-; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v0, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v1, v0, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    global_store_b16 v0, v1, s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_load_i16:
+; GFX12-TRUE16:       ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    global_store_b16 v1, v0, s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_load_i16:
+; GFX12-FAKE16:       ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v1, v0, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    global_store_b16 v0, v1, s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
 entry:
   %ld = load i16, ptr addrspace(4) %in
   store i16 %ld, ptr addrspace(1) %out
@@ -722,41 +733,83 @@ define amdgpu_kernel void @constant_load_v16i16_align2(ptr addrspace(4) %ptr0) #
 ; EG-NEXT:     MOV * T2.X, literal.x,
 ; EG-NEXT:    0(0.000000e+00), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_load_v16i16_align2:
-; GFX12:       ; %bb.0: ; %entry
-; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v8, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_clause 0x7
-; GFX12-NEXT:    global_load_u16 v3, v8, s[0:1] offset:28
-; GFX12-NEXT:    global_load_u16 v2, v8, s[0:1] offset:24
-; GFX12-NEXT:    global_load_u16 v1, v8, s[0:1] offset:20
-; GFX12-NEXT:    global_load_u16 v0, v8, s[0:1] offset:16
-; GFX12-NEXT:    global_load_u16 v7, v8, s[0:1] offset:12
-; GFX12-NEXT:    global_load_u16 v6, v8, s[0:1] offset:8
-; GFX12-NEXT:    global_load_u16 v5, v8, s[0:1] offset:4
-; GFX12-NEXT:    global_load_u16 v4, v8, s[0:1]
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v3, v8, s[0:1] offset:30
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v2, v8, s[0:1] offset:26
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v1, v8, s[0:1] offset:22
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v0, v8, s[0:1] offset:18
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v7, v8, s[0:1] offset:14
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v6, v8, s[0:1] offset:10
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v5, v8, s[0:1] offset:6
-; GFX12-NEXT:    s_wait_loadcnt 0x7
-; GFX12-NEXT:    global_load_d16_hi_b16 v4, v8, s[0:1] offset:2
-; GFX12-NEXT:    s_wait_loadcnt 0x4
-; GFX12-NEXT:    global_store_b128 v[0:1], v[0:3], off
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    global_store_b128 v[0:1], v[4:7], off
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_load_v16i16_align2:
+; GFX12-TRUE16:       ; %bb.0: ; %entry
+; GFX12-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v9, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_clause 0x7
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v9, s[0:1] offset:16
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v0, v9, s[0:1] offset:12
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v1, v9, s[0:1] offset:8
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v1, v9, s[0:1] offset:4
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v4, v9, s[0:1] offset:28
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v4, v9, s[0:1] offset:24
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v5, v9, s[0:1] offset:20
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v8, v9, s[0:1]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x6
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v0.h
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x5
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v1.l
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x4
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v1.h
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x3
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v7.l, v4.l
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x2
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v6.l, v4.h
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v4.l, v0.l
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v8.l
+; GFX12-TRUE16-NEXT:    s_clause 0x7
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v7, v9, s[0:1] offset:30
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v6, v9, s[0:1] offset:26
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v5, v9, s[0:1] offset:22
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v4, v9, s[0:1] offset:18
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v3, v9, s[0:1] offset:14
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v2, v9, s[0:1] offset:10
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v1, v9, s[0:1] offset:6
+; GFX12-TRUE16-NEXT:    global_load_d16_hi_b16 v0, v9, s[0:1] offset:2
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    s_clause 0x1
+; GFX12-TRUE16-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX12-TRUE16-NEXT:    global_store_b128 v[0:1], v[0:3], off
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_load_v16i16_align2:
+; GFX12-FAKE16:       ; %bb.0: ; %entry
+; GFX12-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v8, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_clause 0x7
+; GFX12-FAKE16-NEXT:    global_load_u16 v3, v8, s[0:1] offset:28
+; GFX12-FAKE16-NEXT:    global_load_u16 v2, v8, s[0:1] offset:24
+; GFX12-FAKE16-NEXT:    global_load_u16 v1, v8, s[0:1] offset:20
+; GFX12-FAKE16-NEXT:    global_load_u16 v0, v8, s[0:1] offset:16
+; GFX12-FAKE16-NEXT:    global_load_u16 v7, v8, s[0:1] offset:12
+; GFX12-FAKE16-NEXT:    global_load_u16 v6, v8, s[0:1] offset:8
+; GFX12-FAKE16-NEXT:    global_load_u16 v5, v8, s[0:1] offset:4
+; GFX12-FAKE16-NEXT:    global_load_u16 v4, v8, s[0:1]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v3, v8, s[0:1] offset:30
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v2, v8, s[0:1] offset:26
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v1, v8, s[0:1] offset:22
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v0, v8, s[0:1] offset:18
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v7, v8, s[0:1] offset:14
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v6, v8, s[0:1] offset:10
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v5, v8, s[0:1] offset:6
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x7
+; GFX12-FAKE16-NEXT:    global_load_d16_hi_b16 v4, v8, s[0:1] offset:2
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x4
+; GFX12-FAKE16-NEXT:    global_store_b128 v[0:1], v[0:3], off
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    global_store_b128 v[0:1], v[4:7], off
+; GFX12-FAKE16-NEXT:    s_endpgm
 entry:
   %ld =  load <16 x i16>, ptr addrspace(4) %ptr0, align 2
   store <16 x i16> %ld, ptr addrspace(1) poison, align 32
@@ -5379,16 +5432,27 @@ define amdgpu_kernel void @constant_zextload_i16_to_i64(ptr addrspace(1) %out, p
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_zextload_i16_to_i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v1, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v0, v1, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_zextload_i16_to_i64:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX12-TRUE16-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_zextload_i16_to_i64:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v0, v1, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX12-FAKE16-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %a = load i16, ptr addrspace(4) %in
   %ext = zext i16 %a to i64
   store i64 %ext, ptr addrspace(1) %out
@@ -5467,18 +5531,31 @@ define amdgpu_kernel void @constant_sextload_i16_to_i64(ptr addrspace(1) %out, p
 ; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_sextload_i16_to_i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v2, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v0, v2, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_sextload_i16_to_i64:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v2, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX12-TRUE16-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_sextload_i16_to_i64:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v0, v2, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX12-FAKE16-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %a = load i16, ptr addrspace(4) %in
   %ext = sext i16 %a to i64
   store i64 %ext, ptr addrspace(1) %out
@@ -5550,16 +5627,27 @@ define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(ptr addrspace(1) %ou
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_zextload_v1i16_to_v1i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v1, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v0, v1, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX12-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_zextload_v1i16_to_v1i64:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v1, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX12-TRUE16-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_zextload_v1i16_to_v1i64:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v0, v1, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX12-FAKE16-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %load = load <1 x i16>, ptr addrspace(4) %in
   %ext = zext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(1) %out
@@ -5633,18 +5721,31 @@ define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(ptr addrspace(1) %ou
 ; EG-NEXT:     ASHR * T0.Y, PV.X, literal.x,
 ; EG-NEXT:    31(4.344025e-44), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_sextload_v1i16_to_v1i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    v_mov_b32_e32 v2, 0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    global_load_u16 v0, v2, s[2:3]
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; GFX12-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_sextload_v1i16_to_v1i64:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    global_load_d16_b16 v0, v2, s[2:3]
+; GFX12-TRUE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-TRUE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX12-TRUE16-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_sextload_v1i16_to_v1i64:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, 0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    global_load_u16 v0, v2, s[2:3]
+; GFX12-FAKE16-NEXT:    s_wait_loadcnt 0x0
+; GFX12-FAKE16-NEXT:    v_bfe_i32 v0, v0, 0, 16
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX12-FAKE16-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %load = load <1 x i16>, ptr addrspace(4) %in
   %ext = sext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, ptr addrspace(1) %out
@@ -5724,20 +5825,36 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(ptr addrspace(1) %ou
 ; EG-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), 2(2.802597e-45)
 ;
-; GFX12-LABEL: constant_zextload_v2i16_to_v2i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_load_b32 s2, s[2:3], 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_and_b32 s3, 0xffff, s2
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
-; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
-; GFX12-NEXT:    s_wait_alu 0xfffe
-; GFX12-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1
-; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_zextload_v2i16_to_v2i64:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_mov_b32 s3, s2
+; GFX12-TRUE16-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2
+; GFX12-TRUE16-NEXT:    s_and_b32 s3, 0xffff, s3
+; GFX12-TRUE16-NEXT:    s_wait_alu 0xfffe
+; GFX12-TRUE16-NEXT:    v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v3, v1
+; GFX12-TRUE16-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_zextload_v2i16_to_v2i64:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_load_b32 s2, s[2:3], 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_and_b32 s3, 0xffff, s2
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX12-FAKE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s3
+; GFX12-FAKE16-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
+; GFX12-FAKE16-NEXT:    s_wait_alu 0xfffe
+; GFX12-FAKE16-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1
+; GFX12-FAKE16-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
+; GFX12-FAKE16-NEXT:    s_endpgm
   %load = load <2 x i16>, ptr addrspace(4) %in
   %ext = zext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, ptr addrspace(1) %out
@@ -5947,26 +6064,47 @@ define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(ptr addrspace(1) %ou
 ; EG-NEXT:     LSHR * T8.X, PV.W, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 ;
-; GFX12-LABEL: constant_zextload_v4i16_to_v4i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
-; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    s_and_b32 s4, 0xffff, s2
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
-; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1
-; GFX12-NEXT:    s_pack_hl_b32_b16 s2, s3, 0
-; GFX12-NEXT:    s_and_b32 s3, 0xffff, s3
-; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
-; GFX12-NEXT:    s_wait_alu 0xfffe
-; GFX12-NEXT:    v_mov_b32_e32 v0, s3
-; GFX12-NEXT:    v_mov_b32_e32 v2, s2
-; GFX12-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
-; GFX12-NEXT:    s_endpgm
+; GFX12-TRUE16-LABEL: constant_zextload_v4i16_to_v4i64:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
+; GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-TRUE16-NEXT:    s_mov_b32 s4, s2
+; GFX12-TRUE16-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
+; GFX12-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX12-TRUE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s2
+; GFX12-TRUE16-NEXT:    s_and_b32 s4, 0xffff, s4
+; GFX12-TRUE16-NEXT:    s_pack_hl_b32_b16 s2, s3, 0
+; GFX12-TRUE16-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v3, v1
+; GFX12-TRUE16-NEXT:    s_and_b32 s3, 0xffff, s3
+; GFX12-TRUE16-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
+; GFX12-TRUE16-NEXT:    s_wait_alu 0xfffe
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v0, s3
+; GFX12-TRUE16-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-TRUE16-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
+; GFX12-TRUE16-NEXT:    s_endpgm
+;
+; GFX12-FAKE16-LABEL: constant_zextload_v4i16_to_v4i64:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_load_b64 s[2:3], s[2:3], 0x0
+; GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GFX12-FAKE16-NEXT:    s_and_b32 s4, 0xffff, s2
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-FAKE16-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s4
+; GFX12-FAKE16-NEXT:    s_pack_hl_b32_b16 s2, s2, 0
+; GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX12-FAKE16-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, v1
+; GFX12-FAKE16-NEXT:    s_pack_hl_b32_b16 s2, s3, 0
+; GFX12-FAKE16-NEXT:    s_and_b32 s3, 0xffff, s3
+; GFX12-FAKE16-NEXT:    global_store_b128 v1, v[0:3], s[0:1]
+; GFX12-FAKE16-NEXT:    s_wait_alu 0xfffe
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v0, s3
+; GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, s2
+; GFX12-FAKE16-NEXT:    global_store_b128 v1, v[0:3], s[0:1] offset:16
+; GFX12-FAKE16-NEXT:    s_endpgm
   %load = load <4 x i16>, ptr addrspace(4) %in
   %ext = zext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, ptr addrspace(1) %out

diff  --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
index 7c1da18de70f8..030c332850124 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
 
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
 
 define half @add_select_fabs_fabs_f16(i32 %c, half %x, half %y, half %z) {
 ; CI-LABEL: add_select_fabs_fabs_f16:
@@ -30,14 +32,41 @@ define half @add_select_fabs_fabs_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e64 v0, |v0|, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -73,15 +102,45 @@ define { half, half } @add_select_multi_use_lhs_fabs_fabs_f16(i32 %c, half %x, h
 ; VI-NEXT:    v_add_f16_e64 v1, |v1|, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e64 v1, |v1|, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v1.l, |v1.l|, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v4.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v1, |v1|, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v1.l, |v1.l|, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v4.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v1, |v1|, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -119,15 +178,45 @@ define { half, half } @add_select_multi_store_use_lhs_fabs_fabs_f16(i32 %c, half
 ; VI-NEXT:    v_mov_b32_e32 v1, v4
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v1.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v1.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -165,15 +254,45 @@ define { half, half } @add_select_multi_use_rhs_fabs_fabs_f16(i32 %c, half %x, h
 ; VI-NEXT:    v_add_f16_e64 v1, |v2|, v4
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e64 v1, |v2|, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v1.l, |v2.l|, v4.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v1, |v2|, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v1.l, |v2.l|, v4.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v1, |v2|, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -209,15 +328,45 @@ define half @add_select_fabs_var_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_var_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %select = select i1 %cmp, half %fabs.x, half %y
@@ -248,15 +397,45 @@ define half @add_select_fabs_negk_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_negk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs = call half @llvm.fabs.f16(half %x)
   %select = select i1 %cmp, half %fabs, half -1.0
@@ -286,15 +465,45 @@ define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
 ; VI-NEXT:    v_add_f16_e64 v0, |v0|, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_negk_negk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0xc000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xc000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xc000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xc000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xc000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, half -2.0, half -1.0
   %fabs = call half @llvm.fabs.f16(half %select)
@@ -323,15 +532,45 @@ define half @add_select_posk_posk_f16(i32 %c, half %x) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_posk_posk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0x4000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0x4000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0x4000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v2, 0x4000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, half 2.0, half 1.0
   %add = fadd half %select, %x
@@ -361,15 +600,45 @@ define half @add_select_negk_fabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negk_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs = call half @llvm.fabs.f16(half %x)
   %select = select i1 %cmp, half -1.0, half %fabs
@@ -401,15 +670,45 @@ define half @add_select_negliteralk_fabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negliteralk_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xe400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs = call half @llvm.fabs.f16(half %x)
   %select = select i1 %cmp, half -1024.0, half %fabs
@@ -439,14 +738,41 @@ define half @add_select_fabs_posk_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_add_f16_e64 v0, |v0|, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_posk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs = call half @llvm.fabs.f16(half %x)
   %select = select i1 %cmp, half %fabs, half 1.0
@@ -476,14 +802,41 @@ define half @add_select_posk_fabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_add_f16_e64 v0, |v0|, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_posk_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e64 v0.l, |v0.l|, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e64 v0, |v0|, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs = call half @llvm.fabs.f16(half %x)
   %select = select i1 %cmp, half 1.0, half %fabs
@@ -514,14 +867,41 @@ define half @add_select_fneg_fneg_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_fneg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %fneg.y = fneg half %y
@@ -557,15 +937,45 @@ define { half, half } @add_select_multi_use_lhs_fneg_fneg_f16(i32 %c, half %x, h
 ; VI-NEXT:    v_sub_f16_e32 v1, v4, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_sub_f16_e32 v1, v4, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v4.l, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v1, v4, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v4.l, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v1, v4, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %fneg.y = fneg half %y
@@ -603,15 +1013,45 @@ define { half, half } @add_select_multi_store_use_lhs_fneg_fneg_f16(i32 %c, half
 ; VI-NEXT:    v_mov_b32_e32 v1, v4
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v1.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b16 v1.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %fneg.y = fneg half %y
@@ -649,15 +1089,45 @@ define { half, half } @add_select_multi_use_rhs_fneg_fneg_f16(i32 %c, half %x, h
 ; VI-NEXT:    v_sub_f16_e32 v1, v4, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_sub_f16_e32 v1, v4, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v4.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v1, v4, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v4.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v1, v4, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %fneg.y = fneg half %y
@@ -693,15 +1163,45 @@ define half @add_select_fneg_var_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_var_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half %fneg.x, half %y
@@ -731,14 +1231,41 @@ define half @add_select_fneg_negk_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_negk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half %fneg.x, half -1.0
@@ -769,14 +1296,41 @@ define half @add_select_fneg_inv2pi_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_inv2pi_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half %fneg.x, half 0xH3118
@@ -807,14 +1361,41 @@ define half @add_select_fneg_neginv2pi_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_neginv2pi_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half %fneg.x, half 0xHB118
@@ -843,15 +1424,45 @@ define half @add_select_negk_negk_f16(i32 %c, half %x) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negk_negk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0xc000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xc000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xc000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xc000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xc000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, half -2.0, half -1.0
   %add = fadd half %select, %x
@@ -881,15 +1492,45 @@ define half @add_select_negliteralk_negliteralk_f16(i32 %c, half %x) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v1
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negliteralk_negliteralk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0xe800
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v2, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xe800
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xec00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xe800
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xe800
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xec00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xe800
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, half -2048.0, half -4096.0
   %add = fadd half %select, %x
@@ -917,15 +1558,45 @@ define half @add_select_fneg_negk_negk_f16(i32 %c, half %x) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v1, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_negk_negk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v2, 0xc000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_sub_f16_e32 v0, v1, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xc000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v1.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xc000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v1, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v2.l, 0xc000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v1.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v2, 0xc000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v1, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, half -2.0, half -1.0
   %fneg.x = fneg half %select
@@ -955,14 +1626,41 @@ define half @add_select_negk_fneg_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negk_fneg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half -1.0, half %fneg.x
@@ -992,14 +1690,41 @@ define half @add_select_fneg_posk_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_posk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half %fneg.x, half 1.0
@@ -1029,14 +1754,41 @@ define half @add_select_posk_fneg_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_posk_fneg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v2.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v2, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fneg half %x
   %select = select i1 %cmp, half 1.0, half %fneg.x
@@ -1069,16 +1821,49 @@ define half @add_select_negfabs_fabs_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negfabs_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
@@ -1113,16 +1898,49 @@ define half @add_select_fabs_negfabs_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_negfabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_or_b32_e32 v2, 0x8000, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b16 v0.h, 0x8000, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x8000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b16 v0.h, 0x8000, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x8000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -1157,16 +1975,49 @@ define half @add_select_neg_fabs_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_neg_fabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.h, 0x7fff, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fsub half -0.000000e+00, %x
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -1200,16 +2051,49 @@ define half @add_select_fabs_neg_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_add_f16_e32 v0, v0, v3
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_neg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.h, 0x8000, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b16 v0.h, 0x8000, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.h, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v3.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.y = fsub half -0.000000e+00, %y
@@ -1242,15 +2126,45 @@ define half @add_select_neg_negfabs_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_neg_negfabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fneg.x = fsub half -0.000000e+00, %x
   %fabs.y = call half @llvm.fabs.f16(half %y)
@@ -1284,15 +2198,45 @@ define half @add_select_negfabs_neg_f16(i32 %c, half %x, half %y, half %z) {
 ; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negfabs_neg_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
-; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v3.l, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v0, v3, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
@@ -1325,15 +2269,45 @@ define half @mul_select_negfabs_posk_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_negfabs_posk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
-; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
@@ -1365,15 +2339,45 @@ define half @mul_select_posk_negfabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_posk_negfabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
-; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
@@ -1405,15 +2409,45 @@ define half @mul_select_negfabs_negk_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_negfabs_negk_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
-; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
@@ -1445,15 +2479,45 @@ define half @mul_select_negk_negfabs_f16(i32 %c, half %x, half %y) {
 ; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_negk_negfabs_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
-; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v1, 0x8000, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fabs.x = call half @llvm.fabs.f16(half %x)
   %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
@@ -1487,15 +2551,25 @@ define half @select_fneg_posk_src_add_f16(i32 %c, half %x, half %y) {
 ; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_add_f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_add_f16_e32 v1, 4.0, v1
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v1.l, 4.0, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v1, 4.0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_add_f16:
 ; CI-NSZ:       ; %bb.0:
@@ -1516,14 +2590,23 @@ define half @select_fneg_posk_src_add_f16(i32 %c, half %x, half %y) {
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_add_f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_sub_f16_e32 v1, -4.0, v1
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, -4.0, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v1, -4.0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %add = fadd half %x, 4.0
   %fneg = fneg half %add
@@ -1552,15 +2635,25 @@ define half @select_fneg_posk_src_sub_f16(i32 %c, half %x) {
 ; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_add_f16_e32 v1, -4.0, v1
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_add_f16_e32 v1.l, -4.0, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_add_f16_e32 v1, -4.0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_sub_f16:
 ; CI-NSZ:       ; %bb.0:
@@ -1581,14 +2674,23 @@ define half @select_fneg_posk_src_sub_f16(i32 %c, half %x) {
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_sub_f16_e32 v1, 4.0, v1
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_sub_f16_e32 v0.l, 4.0, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_sub_f16_e32 v1, 4.0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %add = fsub half %x, 4.0
   %fneg = fneg half %add
@@ -1616,14 +2718,41 @@ define half @select_fneg_posk_src_mul_f16(i32 %c, half %x) {
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: select_fneg_posk_src_mul_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mul_f16_e32 v1, -4.0, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_mul_f16_e32 v0.l, -4.0, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mul_f16_e32 v1, -4.0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_mul_f16_e32 v0.l, -4.0, v1.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mul_f16_e32 v1, -4.0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %mul = fmul half %x, 4.0
   %fneg = fneg half %mul
@@ -1654,15 +2783,25 @@ define half @select_fneg_posk_src_fma_f16(i32 %c, half %x, half %z) {
 ; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, 4.0, v1
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v2
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_fmac_f16_e32 v2.l, 4.0, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_fmac_f16_e32 v2, 4.0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_fma_f16:
 ; CI-NSZ:       ; %bb.0:
@@ -1685,14 +2824,23 @@ define half @select_fneg_posk_src_fma_f16(i32 %c, half %x, half %z) {
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_fma_f16 v0.l, v1.l, -4.0, -v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fma = call half @llvm.fma.f16(half %x, half 4.0, half %z)
   %fneg = fneg half %fma
@@ -1724,15 +2872,25 @@ define half @select_fneg_posk_src_fmad_f16(i32 %c, half %x, half %z) {
 ; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, 4.0, v1
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v2
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_fmac_f16_e32 v2.l, 4.0, v1.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v2.l
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_fmac_f16_e32 v2, 4.0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_fmad_f16:
 ; CI-NSZ:       ; %bb.0:
@@ -1756,14 +2914,23 @@ define half @select_fneg_posk_src_fmad_f16(i32 %c, half %x, half %z) {
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_fma_f16 v0.l, v1.l, -4.0, -v2.l
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq i32 %c, 0
   %fmad = call half @llvm.fmuladd.f16(half %x, half 4.0, half %z)
   %fneg = fneg half %fmad
@@ -1776,3 +2943,7 @@ declare half @llvm.fma.f16(half, half, half) #0
 declare half @llvm.fmuladd.f16(half, half, half) #0
 
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX11-NSZ: {{.*}}
+; GFX11-SAFE: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index d2bb971b68030..82d706f073258 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -2,12 +2,14 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
 
 ; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
 
 define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
 ; CI-LABEL: add_select_fabs_fabs_v2f16:
@@ -65,23 +67,75 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -156,24 +210,79 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v2, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v1, v2, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v5
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v3.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v5
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v2, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v5
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v3.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v5
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v2, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v5
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -243,24 +352,79 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_mov_b32_e32 v1, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v3.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b32_e32 v1, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v1, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v3.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v3.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b32_e32 v1, v2
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v1, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -337,24 +501,79 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v3, v5
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v1, v3, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v3, v5
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v3, v5
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v3, v5
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v3, v5
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -420,21 +639,67 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_var_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v5.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v5.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y
@@ -492,20 +757,63 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_negk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xbc00, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xbc00, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xbc00, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xbc00, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0>
@@ -562,20 +870,67 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xc000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v3.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xc000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v3.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select)
@@ -626,19 +981,61 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_posk_posk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v3, 0x4000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0x4000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v3.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v3.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0x4000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0x4000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v3.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v3.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0x4000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0>
   %add = fadd <2 x half> %select, %x
@@ -695,20 +1092,63 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negk_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xbc00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xbc00, v0.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xbc00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xbc00, v0.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs
@@ -767,20 +1207,63 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xe400, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xe400, v0.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xe400, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xe400, v0.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs
@@ -838,20 +1321,63 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_posk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3c00, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3c00, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3c00, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3c00, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0>
@@ -909,20 +1435,63 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_posk_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3c00, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3c00, v0.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3c00, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x3c00, v0.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs
@@ -982,20 +1551,63 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1066,21 +1678,67 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v6.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v6.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1150,21 +1808,67 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1237,21 +1941,67 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x
 ; GFX9-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v6.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v6.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fneg.y = fneg <2 x half> %y
@@ -1321,21 +2071,67 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_var_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v5.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v1.l, v5.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y
@@ -1391,19 +2187,59 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_negk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0>
@@ -1460,19 +2296,59 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xb118, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xb118, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xb118, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118>
@@ -1529,19 +2405,59 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x,
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3118, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3118, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3118, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118>
@@ -1592,19 +2508,61 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negk_negk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xc000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v3.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xc000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v3.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
   %add = fadd <2 x half> %select, %x
@@ -1656,19 +2614,61 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v3, 0xe800
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xe800
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xec00, v3.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xec00, v3.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xe800
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xe800
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xec00, v3.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xec00, v3.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xe800
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0>
   %add = fadd <2 x half> %select, %x
@@ -1718,19 +2718,61 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
 ; GFX9-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_mov_b32_e32 v3, 0xc000
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xc000
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v3.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_mov_b16_e32 v3.l, 0xc000
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v3.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v3.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_mov_b32_e32 v3, 0xc000
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
   %fneg.x = fneg <2 x half> %select
@@ -1786,19 +2828,59 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negk_fneg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x3c00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x
@@ -1854,19 +2936,59 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fneg_posk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0>
@@ -1922,19 +3044,59 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
 ; GFX9-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_posk_fneg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0xbc00, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x
@@ -2002,23 +3164,75 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -2088,23 +3302,75 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -2174,23 +3440,75 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_neg_fabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -2259,23 +3577,75 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
 ; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_fabs_neg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_pk_add_f16 v0, v0, v4
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v2.h, v1.l, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v2, v4
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v4
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.y = fneg <2 x half> %y
@@ -2338,21 +3708,67 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fneg.x = fneg <2 x half> %x
   %fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -2416,21 +3832,67 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v3.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v3.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, v5.l, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -2494,20 +3956,63 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x4400, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x4400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x4400, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x4400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -2570,20 +4075,63 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x4400, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x4400, v0.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x4400, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0x4400, v0.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -2646,20 +4194,63 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xc400, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xc400, v0.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xc400, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xc400, v0.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -2722,20 +4313,63 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
 ; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v3
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xc400, v2.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xc400, v0.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0xc400, v2.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v1.h, 0xc400, v0.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v0, v1, v3
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v3
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
   %fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -2805,20 +4439,34 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
 ; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
 ; CI-NSZ:       ; %bb.0:
@@ -2864,19 +4512,32 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
 ; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %add = fadd <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %add
@@ -2941,20 +4602,34 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
 ; CI-NSZ:       ; %bb.0:
@@ -3000,19 +4675,32 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %add = fsub <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %add
@@ -3065,19 +4753,59 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
 ; GFX9-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %mul = fmul <2 x half> %x, <half 4.0, half 4.0>
   %fneg = fneg <2 x half> %mul
@@ -3148,20 +4876,34 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
 ; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
 ; VI-NSZ:       ; %bb.0:
@@ -3193,19 +4935,32 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
 ; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
   %fneg = fneg <2 x half> %fma
@@ -3278,20 +5033,34 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
 ; GFX9-SAFE-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-SAFE-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE:       ; %bb.0:
-; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-SAFE-TRUE16:       ; %bb.0:
+; GFX11-SAFE-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-TRUE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SAFE-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-SAFE-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-SAFE-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-SAFE-FAKE16:       ; %bb.0:
+; GFX11-SAFE-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SAFE-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-SAFE-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-SAFE-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SAFE-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
 ; CI-NSZ:       ; %bb.0:
@@ -3344,19 +5113,32 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
 ; GFX9-NSZ-NEXT:    v_perm_b32 v0, v1, v0, s4
 ; GFX9-NSZ-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ:       ; %bb.0:
-; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-NSZ-TRUE16:       ; %bb.0:
+; GFX11-NSZ-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-TRUE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x4000, v2.l, s0
+; GFX11-NSZ-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NSZ-TRUE16-NEXT:    v_cndmask_b16 v0.h, 0x4000, v1.l, vcc_lo
+; GFX11-NSZ-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-NSZ-FAKE16:       ; %bb.0:
+; GFX11-NSZ-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NSZ-FAKE16-NEXT:    v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-NSZ-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-NSZ-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-NSZ-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NSZ-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-NSZ-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %c, zeroinitializer
   %fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
   %fneg = fneg <2 x half> %fmad
@@ -3369,3 +5151,7 @@ declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
 declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
 
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
+; GFX11-NSZ: {{.*}}
+; GFX11-SAFE: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index e920fdee51815..5ae6b1d78b70e 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs  | FileCheck %s --check-prefix=SI
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
-; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-TRUE16
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16
 
 define amdgpu_kernel void @select_f16(
 ; SI-LABEL: select_f16:
@@ -81,42 +82,81 @@ define amdgpu_kernel void @select_f16(
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s18, s6
-; GFX11-NEXT:    s_mov_b32 s19, s7
-; GFX11-NEXT:    s_mov_b32 s22, s6
-; GFX11-NEXT:    s_mov_b32 s23, s7
-; GFX11-NEXT:    s_mov_b32 s26, s6
-; GFX11-NEXT:    s_mov_b32 s27, s7
-; GFX11-NEXT:    s_mov_b32 s2, s6
-; GFX11-NEXT:    s_mov_b32 s3, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s16, s10
-; GFX11-NEXT:    s_mov_b32 s17, s11
-; GFX11-NEXT:    s_mov_b32 s20, s12
-; GFX11-NEXT:    s_mov_b32 s21, s13
-; GFX11-NEXT:    s_mov_b32 s24, s14
-; GFX11-NEXT:    s_mov_b32 s25, s15
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[16:19], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v2, off, s[24:27], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v3, off, s[0:3], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s4, s8
-; GFX11-NEXT:    s_mov_b32 s5, s9
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_f16:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s26, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s27, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s12
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s13
+; GFX11-TRUE16-NEXT:    s_mov_b32 s24, s14
+; GFX11-TRUE16-NEXT:    s_mov_b32 s25, s15
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[16:19], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v3, off, s[24:27], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[0:3], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s8
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s9
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v3.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_f16:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s7
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s7
+; GFX11-FAKE16-NEXT:    s_mov_b32 s26, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s27, s7
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s12
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s13
+; GFX11-FAKE16-NEXT:    s_mov_b32 s24, s14
+; GFX11-FAKE16-NEXT:    s_mov_b32 s25, s15
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[16:19], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v2, off, s[24:27], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v3, off, s[0:3], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s8
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s9
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
 
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
@@ -201,36 +241,69 @@ define amdgpu_kernel void @select_f16_imm_a(
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_f16_imm_a:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_f16_imm_a:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[16:19], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_f16_imm_a:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %b,
     ptr addrspace(1) %c,
@@ -312,36 +385,69 @@ define amdgpu_kernel void @select_f16_imm_b(
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_f16_imm_b:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_f16_imm_b:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[16:19], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v1.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_f16_imm_b:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %c,
@@ -424,36 +530,67 @@ define amdgpu_kernel void @select_f16_imm_c(
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_f16_imm_c:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_f16_imm_c:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3800, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_f16_imm_c:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
@@ -536,36 +673,67 @@ define amdgpu_kernel void @select_f16_imm_d(
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_f16_imm_d:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_f16_imm_d:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[12:15], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3800, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_f16_imm_d:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[16:19], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v2, off, s[20:23], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v0, v1
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
@@ -679,50 +847,97 @@ define amdgpu_kernel void @select_v2f16(
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_v2f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x44
-; GFX11-NEXT:    s_mov_b32 s2, -1
-; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s6, s2
-; GFX11-NEXT:    s_mov_b32 s7, s3
-; GFX11-NEXT:    s_mov_b32 s22, s2
-; GFX11-NEXT:    s_mov_b32 s23, s3
-; GFX11-NEXT:    s_mov_b32 s18, s2
-; GFX11-NEXT:    s_mov_b32 s19, s3
-; GFX11-NEXT:    s_mov_b32 s26, s2
-; GFX11-NEXT:    s_mov_b32 s27, s3
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s20, s12
-; GFX11-NEXT:    s_mov_b32 s21, s13
-; GFX11-NEXT:    s_mov_b32 s16, s10
-; GFX11-NEXT:    s_mov_b32 s17, s11
-; GFX11-NEXT:    s_mov_b32 s24, s14
-; GFX11-NEXT:    s_mov_b32 s25, s15
-; GFX11-NEXT:    buffer_load_b32 v0, off, s[4:7], 0
-; GFX11-NEXT:    buffer_load_b32 v1, off, s[20:23], 0
-; GFX11-NEXT:    buffer_load_b32 v2, off, s[16:19], 0
-; GFX11-NEXT:    buffer_load_b32 v3, off, s[24:27], 0
-; GFX11-NEXT:    s_mov_b32 s0, s8
-; GFX11-NEXT:    s_mov_b32 s1, s9
-; GFX11-NEXT:    s_waitcnt vmcnt(3)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v2, v1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v6, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_dual_cndmask_b32 v1, v4, v7 :: v_dual_and_b32 v0, 0xffff, v0
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_v2f16:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x44
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s26, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s27, s3
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s12
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s13
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s11
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[20:23], 0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s24, s14
+; GFX11-TRUE16-NEXT:    s_mov_b32 s25, s15
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v2, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v3, off, s[24:27], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s1, s9
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(3)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e64 s0, v5.l, v4.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v3.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v1.l, v6.l, s0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s8
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_v2f16:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x44
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s26, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s27, s3
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s12
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s13
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s24, s14
+; GFX11-FAKE16-NEXT:    s_mov_b32 s25, s15
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v0, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v1, off, s[20:23], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v2, off, s[16:19], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v3, off, s[24:27], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s0, s8
+; GFX11-FAKE16-NEXT:    s_mov_b32 s1, s9
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(3)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v2, v1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v6, v5
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_dual_cndmask_b32 v1, v4, v7 :: v_dual_and_b32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
@@ -825,45 +1040,85 @@ define amdgpu_kernel void @select_v2f16_imm_a(
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_v2f16_imm_a:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_b32 v0, off, s[12:15], 0
-; GFX11-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
-; GFX11-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0x3900, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_v2f16_imm_a:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[4:11], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s3
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s8
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[12:15], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s9
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s11
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s1, s5
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0.5, v0.l
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e64 s0, 0x3900, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v5.l, v4.l, s0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s4
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_v2f16_imm_a:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v0, off, s[12:15], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, 0x3900, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %b,
     ptr addrspace(1) %c,
@@ -964,45 +1219,85 @@ define amdgpu_kernel void @select_v2f16_imm_b(
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_v2f16_imm_b:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_b32 v0, off, s[12:15], 0
-; GFX11-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
-; GFX11-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0.5, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0x3900, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_v2f16_imm_b:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[4:11], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s3
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s8
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[12:15], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s9
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s11
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s1, s5
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0.5, v0.l
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-TRUE16-NEXT:    v_cmp_gt_f16_e64 s0, 0x3900, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v5.l, v4.l, s0
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s4
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_v2f16_imm_b:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v0, off, s[12:15], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v1, off, s[16:19], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0.5, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_gt_f16_e32 vcc_lo, 0x3900, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %c,
@@ -1105,45 +1400,85 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_v2f16_imm_c:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_b32 v0, off, s[16:19], 0
-; GFX11-NEXT:    buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v5, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_v2f16_imm_c:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[4:11], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s3
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s8
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s9
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[16:19], 0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s1, s5
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_nlt_f16_e64 s0, v4.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3800, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3900, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s4
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_v2f16_imm_c:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v0, off, s[16:19], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
@@ -1246,45 +1581,85 @@ define amdgpu_kernel void @select_v2f16_imm_d(
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: select_v2f16_imm_d:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s18, s10
-; GFX11-NEXT:    s_mov_b32 s19, s11
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s22, s10
-; GFX11-NEXT:    s_mov_b32 s23, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s16, s4
-; GFX11-NEXT:    s_mov_b32 s17, s5
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    s_mov_b32 s20, s6
-; GFX11-NEXT:    s_mov_b32 s21, s7
-; GFX11-NEXT:    buffer_load_b32 v0, off, s[16:19], 0
-; GFX11-NEXT:    buffer_load_b32 v1, off, s[12:15], 0
-; GFX11-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v5, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: select_v2f16_imm_d:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_load_b256 s[4:11], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s3
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s3
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s8
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s9
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s7
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v0, off, s[16:19], 0
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s1, s5
+; GFX11-TRUE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1.l, v0.l
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_cmp_lt_f16_e64 s0, v4.l, v3.l
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, 0x3800, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, 0x3900, v1.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s4
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: select_v2f16_imm_d:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s4
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s5
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s7
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v0, off, s[16:19], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v1, off, s[12:15], 0
+; GFX11-FAKE16-NEXT:    buffer_load_b32 v2, off, s[20:23], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v1, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, 0x3800, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_lt_f16_e32 vcc_lo, v4, v3
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, 0x3900, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b32 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b,
@@ -1400,26 +1775,45 @@ define <4 x half> @v_vselect_v4f16(<4 x half> %a, <4 x half> %b, <4 x i32> %cond
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v4f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, v9, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v11, v10, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v6
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v1, v7, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_vselect_v4f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s1, 0, v4
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s2, 0, v6
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v7.l, v5.l, s0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v9.l, v8.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v2.l, v0.l, s1
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v3.l, v1.l, s2
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_vselect_v4f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v9, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v5
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v11, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v6
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v5, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v7, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <4 x i32> %cond, zeroinitializer
   %select = select <4 x i1> %cmp, <4 x half> %a, <4 x half> %b
   ret <4 x half> %select
@@ -1593,41 +1987,70 @@ define <8 x half> @v_vselect_v8f16(<8 x half> %a, <8 x half> %b, <8 x i32> %cond
 ; VI-NEXT:    v_or_b32_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v8f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v20, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v21, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, v17, v16, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v6
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v13
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, v17, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v11
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v19, v18, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, v21, v20, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v12
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v2, v13, v2, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v10
-; GFX11-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v1, v11, v1, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v3, v15, v3, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_vselect_v8f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v10
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s1, 0, v12
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s2, 0, v9
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s3, 0, v11
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s4, 0, v13
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s5, 0, v15
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v12, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v5
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v4
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s6, 0, v14
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v9.l, v8.l, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v11.l, v10.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v13.l, v12.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v16.l, v15.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v4.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v5.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v6.l, v2.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v7.l, v3.l, s6
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_vselect_v8f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v7
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v15
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v6
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v13
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v17, v16, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v11
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v19, v18, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v9
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v21, v20, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v6, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v8
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v13, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v10
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v9, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v14
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v11, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v15, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <8 x i32> %cond, zeroinitializer
   %select = select <8 x i1> %cmp, <8 x half> %a, <8 x half> %b
   ret <8 x half> %select
@@ -1971,72 +2394,128 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32>
 ; VI-NEXT:    v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v16f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v30
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v34, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v36, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v38, 16, v4
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, v15, v7, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v28
-; GFX11-NEXT:    v_lshrrev_b32_e32 v48, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v50, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v52, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v35, 16, v14
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, v14, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
-; GFX11-NEXT:    v_lshrrev_b32_e32 v54, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v37, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v55, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v39, 16, v12
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v13, v5, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v24
-; GFX11-NEXT:    v_lshrrev_b32_e32 v53, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v51, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v49, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v33, 16, v15
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v22
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v11, v3, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v20
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v29
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, v35, v34, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v27
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, v37, v36, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v6, v8, v6, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v5, v9, v5, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, v39, v38, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v19
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v53, v52, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v17
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v4, v10, v4, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v11, v1, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v12, v55, v54, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v21
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, v51, v50, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, v49, v48, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v2, v13, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v3, v14, v3, 0x5040100
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v31
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v33, v32, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v12, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v7, v11, v7, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_vselect_v16f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s3, 0, v24
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v18
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s1, 0, v20
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s2, 0, v22
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s4, 0, v26
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s5, 0, v28
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s6, 0, v30
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s7, 0, v17
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s8, 0, v19
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s9, 0, v21
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s10, 0, v23
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s11, 0, v25
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s12, 0, v27
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s13, 0, v29
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v15
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v14
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v5
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v13
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v22, 16, v4
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v11
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v26, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v27, 16, v10
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v28, 16, v1
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v29, 16, v9
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v30, 16, v0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v12.l, v4.l, s3
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v19.l, v18.l, s13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v21.l, v20.l, s12
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v23.l, v22.l, s11
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v25.l, v24.l, s10
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v27.l, v26.l, s9
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v29.l, v28.l, s8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v32.l, v30.l, s7
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v15.l, v7.l, s6
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v14.l, v6.l, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v13.l, v5.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v8.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v9.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v10.l, v2.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v11.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s3, 0, v31
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v17.l, v16.l, s3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_vselect_v16f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v30
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v36, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v38, 16, v4
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v15, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v28
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v48, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v50, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v52, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v14
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v14, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v26
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v54, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v37, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v55, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v39, 16, v12
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v13, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v24
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v53, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v51, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v49, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v15
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v12, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v22
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v11, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v20
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v10, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v18
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v16
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v29
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v35, v34, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v27
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v37, v36, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v25
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v8, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v9, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v39, v38, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v19
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v53, v52, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v17
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v10, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v11, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v55, v54, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v21
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v51, v50, vcc_lo
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v23
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, v49, v48, vcc_lo
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v13, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v14, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v31
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v33, v32, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v12, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v11, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <16 x i32> %cond, zeroinitializer
   %select = select <16 x i1> %cmp, <16 x half> %a, <16 x half> %b
   ret <16 x half> %select
@@ -2903,196 +3382,364 @@ define <32 x half> @v_vselect_v32f16(<32 x half> %a, <32 x half> %b, <32 x i32>
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v32f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_clause 0x1f
-; GFX11-NEXT:    scratch_load_b32 v31, off, s32 offset:120
-; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:112
-; GFX11-NEXT:    scratch_load_b32 v33, off, s32
-; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:104
-; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:96
-; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:88
-; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:80
-; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:72
-; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:64
-; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:56
-; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:48
-; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:40
-; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:32
-; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:24
-; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:16
-; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:8
-; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:124
-; GFX11-NEXT:    scratch_load_b32 v64, off, s32 offset:116
-; GFX11-NEXT:    scratch_load_b32 v65, off, s32 offset:108
-; GFX11-NEXT:    scratch_load_b32 v66, off, s32 offset:100
-; GFX11-NEXT:    scratch_load_b32 v67, off, s32 offset:92
-; GFX11-NEXT:    scratch_load_b32 v68, off, s32 offset:84
-; GFX11-NEXT:    scratch_load_b32 v69, off, s32 offset:76
-; GFX11-NEXT:    scratch_load_b32 v70, off, s32 offset:68
-; GFX11-NEXT:    scratch_load_b32 v71, off, s32 offset:60
-; GFX11-NEXT:    scratch_load_b32 v80, off, s32 offset:52
-; GFX11-NEXT:    scratch_load_b32 v81, off, s32 offset:44
-; GFX11-NEXT:    scratch_load_b32 v82, off, s32 offset:36
-; GFX11-NEXT:    scratch_load_b32 v83, off, s32 offset:28
-; GFX11-NEXT:    scratch_load_b32 v84, off, s32 offset:12
-; GFX11-NEXT:    scratch_load_b32 v85, off, s32 offset:4
-; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:20
-; GFX11-NEXT:    scratch_load_b32 v87, off, s32 offset:128
-; GFX11-NEXT:    v_lshrrev_b32_e32 v97, 16, v14
-; GFX11-NEXT:    v_lshrrev_b32_e32 v98, 16, v30
-; GFX11-NEXT:    v_lshrrev_b32_e32 v99, 16, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v100, 16, v29
-; GFX11-NEXT:    v_lshrrev_b32_e32 v101, 16, v12
-; GFX11-NEXT:    v_lshrrev_b32_e32 v102, 16, v28
-; GFX11-NEXT:    v_lshrrev_b32_e32 v103, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v112, 16, v27
-; GFX11-NEXT:    v_lshrrev_b32_e32 v113, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v114, 16, v26
-; GFX11-NEXT:    v_lshrrev_b32_e32 v115, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v116, 16, v25
-; GFX11-NEXT:    v_lshrrev_b32_e32 v117, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v118, 16, v24
-; GFX11-NEXT:    v_lshrrev_b32_e32 v119, 16, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v128, 16, v23
-; GFX11-NEXT:    v_lshrrev_b32_e32 v129, 16, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v130, 16, v22
-; GFX11-NEXT:    v_lshrrev_b32_e32 v131, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v132, 16, v21
-; GFX11-NEXT:    v_lshrrev_b32_e32 v133, 16, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v134, 16, v20
-; GFX11-NEXT:    v_lshrrev_b32_e32 v135, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v144, 16, v19
-; GFX11-NEXT:    v_lshrrev_b32_e32 v145, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v146, 16, v18
-; GFX11-NEXT:    v_lshrrev_b32_e32 v147, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v96, 16, v15
-; GFX11-NEXT:    s_waitcnt vmcnt(32)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v31
-; GFX11-NEXT:    v_lshrrev_b32_e32 v31, 16, v17
-; GFX11-NEXT:    v_cndmask_b32_e32 v97, v98, v97, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(31)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v32
-; GFX11-NEXT:    v_lshrrev_b32_e32 v98, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v16
-; GFX11-NEXT:    v_cndmask_b32_e32 v99, v100, v99, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(29)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v34
-; GFX11-NEXT:    v_lshrrev_b32_e32 v100, 16, v33
-; GFX11-NEXT:    v_cndmask_b32_e32 v34, v102, v101, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(28)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v35
-; GFX11-NEXT:    v_cndmask_b32_e32 v35, v112, v103, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(27)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v36
-; GFX11-NEXT:    v_cndmask_b32_e32 v36, v114, v113, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(26)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v37
-; GFX11-NEXT:    v_cndmask_b32_e32 v37, v116, v115, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(25)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v38
-; GFX11-NEXT:    v_cndmask_b32_e32 v38, v118, v117, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(24)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v39
-; GFX11-NEXT:    v_cndmask_b32_e32 v39, v128, v119, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(23)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v48
-; GFX11-NEXT:    v_cndmask_b32_e32 v48, v130, v129, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(22)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v49
-; GFX11-NEXT:    v_cndmask_b32_e32 v49, v132, v131, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(21)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v50
-; GFX11-NEXT:    v_cndmask_b32_e32 v50, v134, v133, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(20)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v51
-; GFX11-NEXT:    v_cndmask_b32_e32 v51, v144, v135, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(19)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v52
-; GFX11-NEXT:    v_cndmask_b32_e32 v52, v146, v145, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(18)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v53
-; GFX11-NEXT:    v_cndmask_b32_e32 v31, v31, v147, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(17)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v54
-; GFX11-NEXT:    v_cndmask_b32_e32 v32, v32, v98, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(16)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v55
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, v33, v15, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(15)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v64
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, v30, v14, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(14)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v65
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v14, v97, v14, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, v29, v13, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(13)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v66
-; GFX11-NEXT:    v_cndmask_b32_e32 v12, v28, v12, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(12)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v67
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v12, v34, v12, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v27, v11, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(11)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v68
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, v26, v10, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(10)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v69
-; GFX11-NEXT:    v_perm_b32 v13, v99, v13, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v10, v36, v10, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, v25, v9, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(9)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v70
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, v24, v8, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(8)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v71
-; GFX11-NEXT:    v_perm_b32 v11, v35, v11, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v8, v38, v8, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, v23, v7, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(7)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v80
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, v22, v6, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(6)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v81
-; GFX11-NEXT:    v_perm_b32 v9, v37, v9, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v6, v48, v6, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v21, v5, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(5)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v82
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, v20, v4, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(4)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v83
-; GFX11-NEXT:    v_perm_b32 v7, v39, v7, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v4, v50, v4, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v19, v3, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(3)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v84
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v85
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(1)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v86
-; GFX11-NEXT:    v_perm_b32 v5, v49, v5, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v0, v32, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v18, v2, vcc_lo
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v87
-; GFX11-NEXT:    v_perm_b32 v3, v51, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v52, v2, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v16, v100, v96, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v1, v31, v1, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v15, v16, v15, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_vselect_v32f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_clause 0x1f
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v31, off, s32 offset:4
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v32, off, s32 offset:12
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v33, off, s32 offset:20
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v34, off, s32 offset:28
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v35, off, s32 offset:36
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v36, off, s32 offset:44
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v37, off, s32 offset:52
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v38, off, s32 offset:60
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v39, off, s32 offset:68
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v48, off, s32 offset:76
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v49, off, s32 offset:84
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v50, off, s32 offset:92
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v51, off, s32 offset:100
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v52, off, s32 offset:108
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v53, off, s32 offset:116
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v54, off, s32 offset:124
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v55, off, s32 offset:8
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v64, off, s32 offset:16
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v65, off, s32 offset:24
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v66, off, s32 offset:32
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v67, off, s32 offset:40
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v68, off, s32 offset:48
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v69, off, s32 offset:56
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v70, off, s32 offset:64
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v71, off, s32 offset:72
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v80, off, s32 offset:80
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v81, off, s32 offset:88
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v82, off, s32 offset:128
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v83, off, s32
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v84, off, s32 offset:120
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v85, off, s32 offset:112
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v86, off, s32 offset:104
+; GFX11-TRUE16-NEXT:    scratch_load_b32 v87, off, s32 offset:96
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v96, 16, v15
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v97, 16, v14
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v98, 16, v30
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v99, 16, v13
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v100, 16, v29
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v101, 16, v12
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v102, 16, v28
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v103, 16, v11
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v112, 16, v27
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v113, 16, v10
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v114, 16, v26
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v115, 16, v9
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v116, 16, v25
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v117, 16, v8
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v118, 16, v24
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v119, 16, v7
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v128, 16, v23
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v129, 16, v6
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v130, 16, v22
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v131, 16, v5
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v132, 16, v21
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v133, 16, v4
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v134, 16, v20
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v135, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v144, 16, v19
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v145, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v146, 16, v18
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v147, 16, v1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(32)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v31
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v31, 16, v17
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(31)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s0, 0, v32
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(30)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s1, 0, v33
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v16
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(29)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s2, 0, v34
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(28)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s3, 0, v35
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(27)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s4, 0, v36
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(26)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s5, 0, v37
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(25)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s6, 0, v38
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(24)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s7, 0, v39
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(23)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s8, 0, v48
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(22)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s9, 0, v49
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(21)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s10, 0, v50
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(20)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s11, 0, v51
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(19)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s12, 0, v52
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(18)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s13, 0, v53
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(17)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s14, 0, v54
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(16)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s15, 0, v55
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(15)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s16, 0, v64
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(14)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s17, 0, v65
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(13)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s18, 0, v66
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(12)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s19, 0, v67
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(11)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s20, 0, v68
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(10)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s21, 0, v69
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(9)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s22, 0, v70
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(8)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s23, 0, v71
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(7)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s24, 0, v80
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(6)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s25, 0, v81
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(5)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s26, 0, v82
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(4)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v83
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(3)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s27, 0, v84
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s28, 0, v85
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s29, 0, v86
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_cmp_eq_u32_e64 s40, 0, v87
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v15.h, v34.l, v96.l, s26
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v14.h, v98.l, v97.l, s27
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v13.h, v100.l, v99.l, s28
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v12.h, v102.l, v101.l, s29
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v11.h, v112.l, v103.l, s40
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.h, v114.l, v113.l, s25
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.h, v116.l, v115.l, s24
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.h, v118.l, v117.l, s23
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.h, v128.l, v119.l, s22
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.h, v130.l, v129.l, s21
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.h, v132.l, v131.l, s20
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.h, v134.l, v133.l, s19
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.h, v144.l, v135.l, s18
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.h, v146.l, v145.l, s17
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.h, v31.l, v147.l, s16
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.h, v33.l, v32.l, s15
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v15.l, v83.l, v15.l, s14
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v14.l, v30.l, v14.l, s13
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v13.l, v29.l, v13.l, s12
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v12.l, v28.l, v12.l, s11
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v11.l, v27.l, v11.l, s10
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v10.l, v26.l, v10.l, s9
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v9.l, v25.l, v9.l, s8
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v8.l, v24.l, v8.l, s7
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v7.l, v23.l, v7.l, s6
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v6.l, v22.l, v6.l, s5
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v5.l, v21.l, v5.l, s4
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v0.l, v16.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v1.l, v17.l, v1.l, s0
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v2.l, v18.l, v2.l, s1
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v3.l, v19.l, v3.l, s2
+; GFX11-TRUE16-NEXT:    v_cndmask_b16 v4.l, v20.l, v4.l, s3
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_vselect_v32f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_clause 0x1f
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v31, off, s32 offset:120
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v32, off, s32 offset:112
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v33, off, s32
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v34, off, s32 offset:104
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v35, off, s32 offset:96
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v36, off, s32 offset:88
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v37, off, s32 offset:80
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v38, off, s32 offset:72
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v39, off, s32 offset:64
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v48, off, s32 offset:56
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v49, off, s32 offset:48
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v50, off, s32 offset:40
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v51, off, s32 offset:32
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v52, off, s32 offset:24
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v53, off, s32 offset:16
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v54, off, s32 offset:8
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v55, off, s32 offset:124
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v64, off, s32 offset:116
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v65, off, s32 offset:108
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v66, off, s32 offset:100
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v67, off, s32 offset:92
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v68, off, s32 offset:84
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v69, off, s32 offset:76
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v70, off, s32 offset:68
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v71, off, s32 offset:60
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v80, off, s32 offset:52
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v81, off, s32 offset:44
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v82, off, s32 offset:36
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v83, off, s32 offset:28
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v84, off, s32 offset:12
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v85, off, s32 offset:4
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v86, off, s32 offset:20
+; GFX11-FAKE16-NEXT:    scratch_load_b32 v87, off, s32 offset:128
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v97, 16, v14
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v98, 16, v30
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v99, 16, v13
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v100, 16, v29
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v101, 16, v12
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v102, 16, v28
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v103, 16, v11
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v112, 16, v27
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v113, 16, v10
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v114, 16, v26
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v115, 16, v9
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v116, 16, v25
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v117, 16, v8
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v118, 16, v24
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v119, 16, v7
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v128, 16, v23
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v129, 16, v6
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v130, 16, v22
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v131, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v132, 16, v21
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v133, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v134, 16, v20
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v135, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v144, 16, v19
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v145, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v146, 16, v18
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v147, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v96, 16, v15
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(32)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v31
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v31, 16, v17
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v97, v98, v97, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(31)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v32
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v98, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v16
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v99, v100, v99, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(29)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v34
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v100, 16, v33
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v34, v102, v101, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(28)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v35
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v35, v112, v103, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(27)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v36
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v36, v114, v113, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(26)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v37
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v37, v116, v115, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(25)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v38
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v38, v118, v117, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(24)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v39
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v39, v128, v119, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(23)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v48
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v48, v130, v129, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(22)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v49
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v49, v132, v131, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(21)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v50
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v50, v134, v133, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(20)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v51
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v51, v144, v135, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(19)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v52
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v52, v146, v145, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(18)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v53
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v31, v31, v147, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(17)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v54
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v32, v32, v98, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(16)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v55
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v15, v33, v15, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(15)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v64
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v14, v30, v14, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(14)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v65
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v14, v97, v14, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v13, v29, v13, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(13)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v66
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v12, v28, v12, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(12)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v67
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v12, v34, v12, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v11, v27, v11, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(11)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v68
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v10, v26, v10, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(10)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v69
+; GFX11-FAKE16-NEXT:    v_perm_b32 v13, v99, v13, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v10, v36, v10, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v9, v25, v9, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(9)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v70
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v8, v24, v8, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(8)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v71
+; GFX11-FAKE16-NEXT:    v_perm_b32 v11, v35, v11, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v8, v38, v8, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v7, v23, v7, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(7)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v80
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v6, v22, v6, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(6)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v81
+; GFX11-FAKE16-NEXT:    v_perm_b32 v9, v37, v9, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v6, v48, v6, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v5, v21, v5, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(5)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v82
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v4, v20, v4, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(4)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v83
+; GFX11-FAKE16-NEXT:    v_perm_b32 v7, v39, v7, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v4, v50, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v3, v19, v3, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(3)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v84
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v1, v17, v1, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v85
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v0, v16, v0, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v86
+; GFX11-FAKE16-NEXT:    v_perm_b32 v5, v49, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v32, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v2, v18, v2, vcc_lo
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v87
+; GFX11-FAKE16-NEXT:    v_perm_b32 v3, v51, v3, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v2, v52, v2, 0x5040100
+; GFX11-FAKE16-NEXT:    v_cndmask_b32_e32 v16, v100, v96, vcc_lo
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v31, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v15, v16, v15, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <32 x i32> %cond, zeroinitializer
   %select = select <32 x i1> %cmp, <32 x half> %a, <32 x half> %b
   ret <32 x half> %select

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
index f3be926f76bef..ce31f2a74d16a 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
@@ -2,7 +2,8 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
 ; FIXME: promotion not handled without f16 insts
 
 define half @v_constained_fadd_f16_fpexcept_strict(half %x, half %y) #0 {
@@ -12,11 +13,23 @@ define half @v_constained_fadd_f16_fpexcept_strict(half %x, half %y) #0 {
 ; GCN-NEXT:    v_add_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fadd_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constained_fadd_f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fadd_f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -28,11 +41,23 @@ define half @v_constained_fadd_f16_fpexcept_ignore(half %x, half %y) #0 {
 ; GCN-NEXT:    v_add_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_ignore:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fadd_f16_fpexcept_ignore:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constained_fadd_f16_fpexcept_ignore:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fadd_f16_fpexcept_ignore:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
   ret half %val
 }
@@ -44,11 +69,23 @@ define half @v_constained_fadd_f16_fpexcept_maytrap(half %x, half %y) #0 {
 ; GCN-NEXT:    v_add_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fadd_f16_fpexcept_maytrap:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fadd_f16_fpexcept_maytrap:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constained_fadd_f16_fpexcept_maytrap:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fadd_f16_fpexcept_maytrap:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
   ret half %val
 }
@@ -142,12 +179,26 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX8-NEXT:    v_add_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_pk_add_f16 v0, v0, v2
-; GFX10PLUS-NEXT:    v_add_f16_e32 v1, v1, v3
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX10-NEXT:    v_add_f16_e32 v1, v1, v3
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v1.l, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fadd_v3f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, v1, v3
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call <3 x half> @llvm.experimental.constrained.fadd.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret <3 x half> %val
 }
@@ -188,20 +239,33 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX10-NEXT:    v_perm_b32 v1, v4, v1, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
-; GFX11-NEXT:    v_add_f16_e32 v1, v1, v3
-; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX11-NEXT:    v_add_f16_e32 v2, v6, v5
-; GFX11-NEXT:    v_add_f16_e32 v3, v7, v4
-; GFX11-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v1.l, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.h, v6.l, v5.l
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v1.h, v7.l, v4.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, v1, v3
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, v0, v2
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v2, v6, v5
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v3, v7, v4
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call <4 x half> @llvm.experimental.constrained.fadd.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret <4 x half> %val
 }
@@ -213,10 +277,20 @@ define amdgpu_ps half @s_constained_fadd_f16_fpexcept_strict(half inreg %x, half
 ; GCN-NEXT:    v_add_f16_e32 v0, s2, v0
 ; GCN-NEXT:    ; return to shader part epilog
 ;
-; GFX10PLUS-LABEL: s_constained_fadd_f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    v_add_f16_e64 v0, s2, s3
-; GFX10PLUS-NEXT:    ; return to shader part epilog
+; GFX10-LABEL: s_constained_fadd_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, s3
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: s_constained_fadd_f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    v_add_f16_e64 v0.l, s2, s3
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-FAKE16-LABEL: s_constained_fadd_f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    v_add_f16_e64 v0, s2, s3
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
   %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -255,3 +329,5 @@ declare <4 x half> @llvm.experimental.constrained.fadd.v4f16(<4 x half>, <4 x ha
 
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
index 407bb002483ec..ccd21b74f49bd 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
@@ -2,7 +2,8 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
 
 define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 {
 ; GCN-LABEL: v_constained_fma_f16_fpexcept_strict:
@@ -17,11 +18,17 @@ define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0
 ; GFX10-NEXT:    v_fma_f16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f16 v0, v0, v1, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fma_f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_fma_f16 v0.l, v0.l, v1.l, v2.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fma_f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_fma_f16 v0, v0, v1, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -88,12 +95,20 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha
 ; GFX10-NEXT:    v_fma_f16 v1, v1, v3, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fma_v3f16_fpexcept_strict:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
-; GFX11-NEXT:    v_fma_f16 v1, v1, v3, v5
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fma_v3f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_fmac_f16_e32 v5.l, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, v5
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fma_v3f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
+; GFX11-FAKE16-NEXT:    v_fma_f16 v1, v1, v3, v5
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret <3 x half> %val
 }
@@ -153,22 +168,40 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha
 ; GFX10-NEXT:    v_perm_b32 v0, v9, v4, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fma_v4f16_fpexcept_strict:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
-; GFX11-NEXT:    v_fmac_f16_e32 v4, v0, v2
-; GFX11-NEXT:    v_fmac_f16_e32 v6, v8, v7
-; GFX11-NEXT:    v_fmac_f16_e32 v5, v1, v3
-; GFX11-NEXT:    v_fmac_f16_e32 v9, v11, v10
-; GFX11-NEXT:    v_perm_b32 v1, v6, v5, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v0, v9, v4, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fma_v4f16_fpexcept_strict:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v2
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v1
+; GFX11-TRUE16-NEXT:    v_fmac_f16_e32 v5.l, v1.l, v3.l
+; GFX11-TRUE16-NEXT:    v_fmac_f16_e32 v4.l, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    v_fmac_f16_e32 v8.l, v10.l, v9.l
+; GFX11-TRUE16-NEXT:    v_fmac_f16_e32 v6.l, v11.l, v7.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v4.h, v8.l
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v5.h, v6.l
+; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fma_v4f16_fpexcept_strict:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
+; GFX11-FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v0
+; GFX11-FAKE16-NEXT:    v_fmac_f16_e32 v4, v0, v2
+; GFX11-FAKE16-NEXT:    v_fmac_f16_e32 v6, v8, v7
+; GFX11-FAKE16-NEXT:    v_fmac_f16_e32 v5, v1, v3
+; GFX11-FAKE16-NEXT:    v_fmac_f16_e32 v9, v11, v10
+; GFX11-FAKE16-NEXT:    v_perm_b32 v1, v6, v5, 0x5040100
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v9, v4, 0x5040100
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret <4 x half> %val
 }
@@ -186,11 +219,17 @@ define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z
 ; GFX10-NEXT:    v_fma_f16 v0, v0, v1, -v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f16 v0, v0, v1, -v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_fma_f16 v0.l, v0.l, v1.l, -v2.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_fma_f16 v0, v0, v1, -v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.z = fneg half %z
   %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
@@ -209,11 +248,17 @@ define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, ha
 ; GFX10-NEXT:    v_fma_f16 v0, -v0, -v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f16 v0, -v0, -v1, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_fma_f16 v0.l, -v0.l, -v1.l, v2.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_fma_f16 v0, -v0, -v1, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.x = fneg half %x
   %neg.y = fneg half %y
   %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
@@ -233,11 +278,17 @@ define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, ha
 ; GFX10-NEXT:    v_fma_f16 v0, |v0|, |v1|, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_fma_f16 v0, |v0|, |v1|, v2
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_fma_f16 v0.l, |v0.l|, |v1.l|, v2.l
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_fma_f16 v0, |v0|, |v1|, v2
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.x = call half @llvm.fabs.f16(half %x) #0
   %neg.y = call half @llvm.fabs.f16(half %y) #0
   %val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
index d798166a67839..b2ebae84a961c 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
@@ -8,8 +8,10 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s
 
-; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL,GFX1-GISEL-FAKE16 %s
 
 
 ; FIXME: promotion not handled without f16 insts
@@ -21,11 +23,35 @@ define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) #0 {
 ; GCN-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fmul_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fmul_f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fmul_f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1-GISEL-TRUE16-LABEL: v_constained_fmul_f16_fpexcept_strict:
+; GFX1-GISEL-TRUE16:       ; %bb.0:
+; GFX1-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1-GISEL-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX1-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1-GISEL-FAKE16-LABEL: v_constained_fmul_f16_fpexcept_strict:
+; GFX1-GISEL-FAKE16:       ; %bb.0:
+; GFX1-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1-GISEL-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX1-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -37,11 +63,35 @@ define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) #0 {
 ; GCN-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_ignore:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fmul_f16_fpexcept_ignore:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fmul_f16_fpexcept_ignore:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fmul_f16_fpexcept_ignore:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1-GISEL-TRUE16-LABEL: v_constained_fmul_f16_fpexcept_ignore:
+; GFX1-GISEL-TRUE16:       ; %bb.0:
+; GFX1-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1-GISEL-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX1-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1-GISEL-FAKE16-LABEL: v_constained_fmul_f16_fpexcept_ignore:
+; GFX1-GISEL-FAKE16:       ; %bb.0:
+; GFX1-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1-GISEL-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX1-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
   ret half %val
 }
@@ -53,11 +103,35 @@ define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) #0 {
 ; GCN-NEXT:    v_mul_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1-GISEL-TRUE16-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
+; GFX1-GISEL-TRUE16:       ; %bb.0:
+; GFX1-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1-GISEL-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX1-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX1-GISEL-FAKE16-LABEL: v_constained_fmul_f16_fpexcept_maytrap:
+; GFX1-GISEL-FAKE16:       ; %bb.0:
+; GFX1-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1-GISEL-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX1-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
   ret half %val
 }
@@ -205,12 +279,19 @@ define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX10-GISEL-NEXT:    v_pk_mul_f16 v1, v1, v3
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_pk_mul_f16 v0, v0, v2
-; GFX11-SDAG-NEXT:    v_mul_f16_e32 v1, v1, v3
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_pk_mul_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict:
 ; GFX1-GISEL:       ; %bb.0:
@@ -283,20 +364,33 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX10-GISEL-NEXT:    v_pk_mul_f16 v1, v1, v3
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
-; GFX11-SDAG-NEXT:    v_mul_f16_e32 v1, v1, v3
-; GFX11-SDAG-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT:    v_mul_f16_e32 v2, v6, v5
-; GFX11-SDAG-NEXT:    v_mul_f16_e32 v3, v7, v4
-; GFX11-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-SDAG-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v0.h, v6.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e32 v1.h, v7.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v2, v6, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e32 v3, v7, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
 ; GFX1-GISEL:       ; %bb.0:
@@ -315,10 +409,30 @@ define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half
 ; GCN-NEXT:    v_mul_f16_e32 v0, s2, v0
 ; GCN-NEXT:    ; return to shader part epilog
 ;
-; GFX10PLUS-LABEL: s_constained_fmul_f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    v_mul_f16_e64 v0, s2, s3
-; GFX10PLUS-NEXT:    ; return to shader part epilog
+; GFX10-LABEL: s_constained_fmul_f16_fpexcept_strict:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_mul_f16_e64 v0, s2, s3
+; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-TRUE16-LABEL: s_constained_fmul_f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    v_mul_f16_e64 v0.l, s2, s3
+; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-FAKE16-LABEL: s_constained_fmul_f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    v_mul_f16_e64 v0, s2, s3
+; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX1-GISEL-TRUE16-LABEL: s_constained_fmul_f16_fpexcept_strict:
+; GFX1-GISEL-TRUE16:       ; %bb.0:
+; GFX1-GISEL-TRUE16-NEXT:    v_mul_f16_e64 v0.l, s2, s3
+; GFX1-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX1-GISEL-FAKE16-LABEL: s_constained_fmul_f16_fpexcept_strict:
+; GFX1-GISEL-FAKE16:       ; %bb.0:
+; GFX1-GISEL-FAKE16-NEXT:    v_mul_f16_e64 v0, s2, s3
+; GFX1-GISEL-FAKE16-NEXT:    ; return to shader part epilog
   %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -370,6 +484,6 @@ declare <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half>, <4 x ha
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10: {{.*}}
 ; GFX11: {{.*}}
+; GFX11-SDAG: {{.*}}
 ; GFX8: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index 3420596da2aac..d6c5c937fd83e 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -5,11 +5,13 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
 
-; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-GISEL %s
 
-; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10PLUS-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-SDAG-FAKE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-GISEL-FAKE16 %s
 
 ; FIXME: promotion not handled without f16 insts
 
@@ -20,11 +22,41 @@ define half @v_constained_fsub_f16_fpexcept_strict(half %x, half %y) #0 {
 ; GCN-NEXT:    v_sub_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_constained_fsub_f16_fpexcept_strict:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_constained_fsub_f16_fpexcept_strict:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: v_constained_fsub_f16_fpexcept_strict:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: v_constained_fsub_f16_fpexcept_strict:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -36,11 +68,41 @@ define half @v_constained_fsub_f16_fpexcept_ignore(half %x, half %y) #0 {
 ; GCN-NEXT:    v_sub_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_ignore:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_constained_fsub_f16_fpexcept_ignore:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_constained_fsub_f16_fpexcept_ignore:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_f16_fpexcept_ignore:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_f16_fpexcept_ignore:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: v_constained_fsub_f16_fpexcept_ignore:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: v_constained_fsub_f16_fpexcept_ignore:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
   ret half %val
 }
@@ -52,11 +114,41 @@ define half @v_constained_fsub_f16_fpexcept_maytrap(half %x, half %y) #0 {
 ; GCN-NEXT:    v_sub_f16_e32 v0, v0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX10PLUS-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: v_constained_fsub_f16_fpexcept_maytrap:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
   ret half %val
 }
@@ -108,6 +200,30 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
 ; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
 ; GFX10PLUS-SDAG:       ; %bb.0:
 ; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -117,7 +233,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
 ; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v3, v2
 ; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
 ; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
 ; GFX10PLUS-GISEL:       ; %bb.0:
 ; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -174,6 +289,30 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
 ; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
 ; GFX10PLUS-SDAG:       ; %bb.0:
 ; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -183,7 +322,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
 ; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v3, v2
 ; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
 ; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
 ; GFX10PLUS-GISEL:       ; %bb.0:
 ; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -240,6 +378,30 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
 ; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
 ; GFX10PLUS-SDAG:       ; %bb.0:
 ; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -249,7 +411,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
 ; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v3, v2
 ; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
 ; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
 ; GFX10PLUS-GISEL:       ; %bb.0:
 ; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -316,6 +477,46 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX10-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v5.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v2, v5, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v0.h, v2.h
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v2, v4, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
 ; GFX10PLUS-SDAG:       ; %bb.0:
 ; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -326,7 +527,6 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e32 v2, v5, v4
 ; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
 ; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
 ; GFX10PLUS-GISEL:       ; %bb.0:
 ; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -413,6 +613,59 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX10-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
 ; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
+; GFX11-SDAG-TRUE16-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v6.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v1.h, v7.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v2, v6, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e32 v3, v7, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v0.h, v2.h
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v1.h, v1.h, v3.h
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v2, v4, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e32 v3, v5, v7
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ; GFX10PLUS-SDAG-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
 ; GFX10PLUS-SDAG:       ; %bb.0:
 ; GFX10PLUS-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -427,7 +680,6 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
 ; GFX10PLUS-SDAG-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
 ; GFX10PLUS-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
 ; GFX10PLUS-GISEL-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
 ; GFX10PLUS-GISEL:       ; %bb.0:
 ; GFX10PLUS-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -455,10 +707,35 @@ define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half
 ; GCN-NEXT:    v_sub_f16_e32 v0, s2, v0
 ; GCN-NEXT:    ; return to shader part epilog
 ;
-; GFX10PLUS-LABEL: s_constained_fsub_f16_fpexcept_strict:
-; GFX10PLUS:       ; %bb.0:
-; GFX10PLUS-NEXT:    v_sub_f16_e64 v0, s2, s3
-; GFX10PLUS-NEXT:    ; return to shader part epilog
+; GFX10-SDAG-LABEL: s_constained_fsub_f16_fpexcept_strict:
+; GFX10-SDAG:       ; %bb.0:
+; GFX10-SDAG-NEXT:    v_sub_f16_e64 v0, s2, s3
+; GFX10-SDAG-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_constained_fsub_f16_fpexcept_strict:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_sub_f16_e64 v0, s2, s3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-TRUE16-LABEL: s_constained_fsub_f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e64 v0.l, s2, s3
+; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-FAKE16-LABEL: s_constained_fsub_f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e64 v0, s2, s3
+; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-TRUE16-LABEL: s_constained_fsub_f16_fpexcept_strict:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e64 v0.l, s2, s3
+; GFX11-GISEL-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-FAKE16-LABEL: s_constained_fsub_f16_fpexcept_strict:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    v_sub_f16_e64 v0, s2, s3
+; GFX11-GISEL-FAKE16-NEXT:    ; return to shader part epilog
   %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
   ret half %val
 }
@@ -523,6 +800,35 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
 ; GFX10-GISEL-NEXT:    v_pk_add_f16 v0, s2, s0
 ; GFX10-GISEL-NEXT:    ; return to shader part epilog
 ;
+; GFX11-SDAG-TRUE16-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.h, s3
+; GFX11-SDAG-TRUE16-NEXT:    s_lshr_b32 s0, s2, 16
+; GFX11-SDAG-TRUE16-NEXT:    s_lshr_b32 s1, s3, 16
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, s0
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.h, s1
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-SDAG-FAKE16-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e64 v0, s2, s3
+; GFX11-SDAG-FAKE16-NEXT:    s_lshr_b32 s0, s3, 16
+; GFX11-SDAG-FAKE16-NEXT:    s_lshr_b32 s1, s2, 16
+; GFX11-SDAG-FAKE16-NEXT:    v_sub_f16_e64 v1, s1, s0
+; GFX11-SDAG-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
+; GFX11-GISEL:       ; %bb.0:
+; GFX11-GISEL-NEXT:    s_xor_b32 s0, s3, 0x80008000
+; GFX11-GISEL-NEXT:    v_pk_add_f16 v0, s2, s0
+; GFX11-GISEL-NEXT:    ; return to shader part epilog
 ; GFX10PLUS-SDAG-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
 ; GFX10PLUS-SDAG:       ; %bb.0:
 ; GFX10PLUS-SDAG-NEXT:    v_sub_f16_e64 v0, s2, s3
@@ -532,7 +838,6 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
 ; GFX10PLUS-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX10PLUS-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GFX10PLUS-SDAG-NEXT:    ; return to shader part epilog
-;
 ; GFX10PLUS-GISEL-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
 ; GFX10PLUS-GISEL:       ; %bb.0:
 ; GFX10PLUS-GISEL-NEXT:    s_xor_b32 s0, s3, 0x80008000

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
index 1f3f17c3e0c46..c46e3a08a6a0c 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
@@ -3,12 +3,14 @@
 ; XUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
 
 ; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
 
 ; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 {
 ;   %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
@@ -34,14 +36,23 @@ define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 {
 ; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v2, v0
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-GISEL-LABEL: test_ldexp_f16_i32:
 ; GFX8-GISEL:       ; %bb.0:
@@ -61,14 +72,23 @@ define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 {
 ; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v0, v2, v0
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_i32 v0, 0xffff8000, v3, v0
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v2, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v0, 0xffff8000, v3, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v0, 0xffff8000, v3, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v2, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret half %result
 }
@@ -104,19 +124,31 @@ define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a,
 ; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v1, s4
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SDAG-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v1, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v1
-; GFX11-SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v0, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v0, v3, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v1, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v2, v0
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v3, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32:
 ; GFX8-GISEL:       ; %bb.0:
@@ -142,21 +174,33 @@ define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a,
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v3
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v3, v0
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-GISEL-NEXT:    v_med3_i32 v0, 0xffff8000, v4, v0
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v2, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v3, v0
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v3, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v3, 0xffff8000, v4, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v2.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v1, 0xffff8000, v3, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v0, 0xffff8000, v4, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v2, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v3, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <2 x half> %result
 }
@@ -191,22 +235,37 @@ define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a,
 ; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v1
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SDAG-NEXT:    v_med3_i32 v0, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v1, v5, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v0
-; GFX11-SDAG-NEXT:    v_med3_i32 v2, v6, s0, 0x7fff
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v4, v1
-; GFX11-SDAG-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v3, v2
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v0, v5, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v5, v6, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v3.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v0, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v1, v5, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v2, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v2, v6, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v4, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v3, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32:
 ; GFX8-GISEL:       ; %bb.0:
@@ -236,23 +295,37 @@ define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a,
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v4
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v4, v0
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-GISEL-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v2, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v4, v5
-; GFX11-GISEL-NEXT:    v_med3_i32 v4, 0xffff8000, v6, v0
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v3, v4
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v6, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v3.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v4.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v2.h, v5.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v1, 0xffff8000, v4, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v2, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v4, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v4, 0xffff8000, v6, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v3, v4
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <3 x half> %result
 }
@@ -293,26 +366,44 @@ define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a,
 ; GFX9-SDAG-NEXT:    v_perm_b32 v1, v3, v1, s4
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-SDAG-NEXT:    v_med3_i32 v0, v6, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v1, v7, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v3, v3, v0
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v2, v4
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v2, v6, v5
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v7, v1
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
-; GFX11-SDAG-NEXT:    v_perm_b32 v1, v1, v3, 0x5040100
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v0, v7, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v6, v6, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.h, v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v7.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v3.l, v6.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v0, v6, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v1, v7, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v3, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v2, v4
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v6, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v7, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    v_perm_b32 v1, v1, v3, 0x5040100
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32:
 ; GFX8-GISEL:       ; %bb.0:
@@ -348,30 +439,47 @@ define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a,
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v5
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7fff
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v0
-; GFX11-GISEL-NEXT:    v_med3_i32 v6, 0xffff8000, v6, v0
-; GFX11-GISEL-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
-; GFX11-GISEL-NEXT:    v_med3_i32 v0, 0xffff8000, v7, v0
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v2, v4
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v3, v3, v6
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v1, v5
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v4, v8, v0
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v1, v4, 16, v2
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v4, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v5, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v5, 0xffff8000, v6, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v6, 0xffff8000, v7, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v2.h, v4.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v3.l, v5.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.h, v3.h, v6.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v3
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v6, 0xffff8000, v6, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v0, 0xffff8000, v7, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v2, v4
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v3, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v5
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v4, v8, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v2
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v2, 0xffff, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v1, v4, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict")
   ret <4 x half> %result
 }
@@ -388,5 +496,7 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessib
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
 ; GFX11: {{.*}}
+; GFX11-GISEL: {{.*}}
+; GFX11-SDAG: {{.*}}
 ; GFX8: {{.*}}
 ; GFX9: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index c9547e2c68c82..5ec19a54403e0 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -2,7 +2,8 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,GFX9
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GCN,VI
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX10
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX11
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
 define amdgpu_kernel void @v_test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
@@ -785,28 +786,53 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(ptr addrspace(1) %out,
 ; GFX10-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX10-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: v_test_sub_v2i16_zext_to_v2i64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    global_load_b32 v0, v0, s[4:5] glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s2, -1
-; GFX11-NEXT:    v_pk_sub_i16 v0, v1, v0
-; GFX11-NEXT:    v_mov_b32_e32 v1, 0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_alignbit_b32 v2, 0, v0, 16
-; GFX11-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
-; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: v_test_sub_v2i16_zext_to_v2i64:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    global_load_b32 v0, v0, s[4:5] glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-TRUE16-NEXT:    v_pk_sub_i16 v0, v1, v0
+; GFX11-TRUE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-TRUE16-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_lshl_or_b32 v2, 0, 16, v2
+; GFX11-TRUE16-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: v_test_sub_v2i16_zext_to_v2i64:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    global_load_b32 v1, v0, s[2:3] glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    global_load_b32 v0, v0, s[4:5] glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, -1
+; GFX11-FAKE16-NEXT:    v_pk_sub_i16 v0, v1, v0
+; GFX11-FAKE16-NEXT:    v_mov_b32_e32 v1, 0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_alignbit_b32 v2, 0, v0, 16
+; GFX11-FAKE16-NEXT:    v_dual_mov_b32 v3, v1 :: v_dual_and_b32 v0, 0xffff, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i64>, ptr addrspace(1) %out, i32 %tid
   %gep.in0 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in0, i32 %tid

diff  --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
index ee16dad2d7d11..580938f922a04 100644
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -1,7 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
-; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+real-true16 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-TRUE16
+; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-real-true16 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX11-FAKE16
 
 define amdgpu_kernel void @madak_f16(
 ; SI-LABEL: madak_f16:
@@ -52,30 +53,55 @@ define amdgpu_kernel void @madak_f16(
 ; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: madak_f16:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
-; GFX11-NEXT:    s_mov_b32 s10, -1
-; GFX11-NEXT:    s_mov_b32 s11, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s14, s10
-; GFX11-NEXT:    s_mov_b32 s15, s11
-; GFX11-NEXT:    s_mov_b32 s6, s10
-; GFX11-NEXT:    s_mov_b32 s7, s11
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s12, s2
-; GFX11-NEXT:    s_mov_b32 s13, s3
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[4:7], 0
-; GFX11-NEXT:    s_mov_b32 s8, s0
-; GFX11-NEXT:    s_mov_b32 s9, s1
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_add_f16_e32 v0, 0x4900, v0
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: madak_f16:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-TRUE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-TRUE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-TRUE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, 0x4900, v0.l
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: madak_f16:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-FAKE16-NEXT:    s_mov_b32 s10, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s11, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s14, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s15, s11
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, s11
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s12, s2
+; GFX11-FAKE16-NEXT:    s_mov_b32 s13, s3
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[12:15], 0
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s8, s0
+; GFX11-FAKE16-NEXT:    s_mov_b32 s9, s1
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 0x4900, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[8:11], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
     ptr addrspace(1) %b) #0 {
@@ -164,42 +190,79 @@ define amdgpu_kernel void @madak_f16_use_2(
 ; VI-NEXT:    buffer_store_short v3, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
 ;
-; GFX11-LABEL: madak_f16_use_2:
-; GFX11:       ; %bb.0: ; %entry
-; GFX11-NEXT:    s_clause 0x1
-; GFX11-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
-; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
-; GFX11-NEXT:    s_mov_b32 s6, -1
-; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX11-NEXT:    s_mov_b32 s18, s6
-; GFX11-NEXT:    s_mov_b32 s19, s7
-; GFX11-NEXT:    s_mov_b32 s22, s6
-; GFX11-NEXT:    s_mov_b32 s23, s7
-; GFX11-NEXT:    s_mov_b32 s2, s6
-; GFX11-NEXT:    s_mov_b32 s3, s7
-; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s16, s12
-; GFX11-NEXT:    s_mov_b32 s17, s13
-; GFX11-NEXT:    s_mov_b32 s20, s14
-; GFX11-NEXT:    s_mov_b32 s21, s15
-; GFX11-NEXT:    buffer_load_u16 v0, off, s[16:19], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    buffer_load_u16 v2, off, s[0:3], 0 glc dlc
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    s_mov_b32 s4, s8
-; GFX11-NEXT:    s_mov_b32 s5, s9
-; GFX11-NEXT:    s_mov_b32 s0, s10
-; GFX11-NEXT:    s_mov_b32 s1, s11
-; GFX11-NEXT:    v_mul_f16_e32 v1, v0, v1
-; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_add_f16_e32 v1, 0x4900, v1
-; GFX11-NEXT:    v_add_f16_e32 v0, 0x4900, v0
-; GFX11-NEXT:    buffer_store_b16 v1, off, s[4:7], 0
-; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
-; GFX11-NEXT:    s_endpgm
+; GFX11-TRUE16-LABEL: madak_f16_use_2:
+; GFX11-TRUE16:       ; %bb.0: ; %entry
+; GFX11-TRUE16-NEXT:    s_clause 0x1
+; GFX11-TRUE16-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
+; GFX11-TRUE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX11-TRUE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-TRUE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-TRUE16-NEXT:    s_mov_b32 s18, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s19, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s22, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s23, s7
+; GFX11-TRUE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-TRUE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-TRUE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s16, s12
+; GFX11-TRUE16-NEXT:    s_mov_b32 s17, s13
+; GFX11-TRUE16-NEXT:    s_mov_b32 s20, s14
+; GFX11-TRUE16-NEXT:    s_mov_b32 s21, s15
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v0, off, s[16:19], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    buffer_load_u16 v2, off, s[0:3], 0 glc dlc
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-TRUE16-NEXT:    s_mov_b32 s4, s8
+; GFX11-TRUE16-NEXT:    s_mov_b32 s5, s9
+; GFX11-TRUE16-NEXT:    s_mov_b32 s0, s10
+; GFX11-TRUE16-NEXT:    s_mov_b32 s1, s11
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.h, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    v_mul_f16_e32 v0.l, v0.l, v2.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v1.l, 0x4900, v0.h
+; GFX11-TRUE16-NEXT:    v_add_f16_e32 v0.l, 0x4900, v0.l
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v1, off, s[4:7], 0
+; GFX11-TRUE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-TRUE16-NEXT:    s_endpgm
+;
+; GFX11-FAKE16-LABEL: madak_f16_use_2:
+; GFX11-FAKE16:       ; %bb.0: ; %entry
+; GFX11-FAKE16-NEXT:    s_clause 0x1
+; GFX11-FAKE16-NEXT:    s_load_b256 s[8:15], s[4:5], 0x24
+; GFX11-FAKE16-NEXT:    s_load_b64 s[0:1], s[4:5], 0x44
+; GFX11-FAKE16-NEXT:    s_mov_b32 s6, -1
+; GFX11-FAKE16-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX11-FAKE16-NEXT:    s_mov_b32 s18, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s19, s7
+; GFX11-FAKE16-NEXT:    s_mov_b32 s22, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s23, s7
+; GFX11-FAKE16-NEXT:    s_mov_b32 s2, s6
+; GFX11-FAKE16-NEXT:    s_mov_b32 s3, s7
+; GFX11-FAKE16-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s16, s12
+; GFX11-FAKE16-NEXT:    s_mov_b32 s17, s13
+; GFX11-FAKE16-NEXT:    s_mov_b32 s20, s14
+; GFX11-FAKE16-NEXT:    s_mov_b32 s21, s15
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v0, off, s[16:19], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v1, off, s[20:23], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    buffer_load_u16 v2, off, s[0:3], 0 glc dlc
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-FAKE16-NEXT:    s_mov_b32 s4, s8
+; GFX11-FAKE16-NEXT:    s_mov_b32 s5, s9
+; GFX11-FAKE16-NEXT:    s_mov_b32 s0, s10
+; GFX11-FAKE16-NEXT:    s_mov_b32 s1, s11
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v1, v0, v1
+; GFX11-FAKE16-NEXT:    v_mul_f16_e32 v0, v0, v2
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v1, 0x4900, v1
+; GFX11-FAKE16-NEXT:    v_add_f16_e32 v0, 0x4900, v0
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v1, off, s[4:7], 0
+; GFX11-FAKE16-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-FAKE16-NEXT:    s_endpgm
     ptr addrspace(1) %r0,
     ptr addrspace(1) %r1,
     ptr addrspace(1) %a,
@@ -221,3 +284,5 @@ entry:
 }
 
 attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index b81f5d0a19ba8..556c553cfd7d5 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -2,12 +2,14 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,SDAG-GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX12 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX12,SDAG-GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX12,SDAG-GFX12-FAKE16 %s
 
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1101 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GFX11,GISEL-GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX12 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX12,GISEL-GFX12-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -verify-machineinstrs -global-isel < %s | FileCheck -check-prefixes=GISEL-GFX12,GISEL-GFX12-FAKE16 %s
 
 ; <GFX9 has no V_SAT_PK, GFX9+ has V_SAT_PK, GFX11 has V_SAT_PK with t16
 
@@ -48,18 +50,29 @@ define <2 x i16> @basic_smax_smin(i16 %src0, i16 %src1) {
 ; SDAG-GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; SDAG-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smax_smin:
 ; GISEL-VI:       ; %bb.0:
@@ -92,19 +105,30 @@ define <2 x i16> @basic_smax_smin(i16 %src0, i16 %src1) {
 ; GISEL-GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smax_smin:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GISEL-GFX12-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smax_smin:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smax_smin:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
   %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
@@ -158,18 +182,34 @@ define amdgpu_kernel void @basic_smax_smin_sgpr(ptr addrspace(1) %out, i32 inreg
 ; SDAG-GFX11-NEXT:    global_store_b32 v2, v0, s[0:1]
 ; SDAG-GFX11-NEXT:    s_endpgm
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin_sgpr:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
-; SDAG-GFX12-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, s2, 0, 0xff
-; SDAG-GFX12-NEXT:    v_med3_i16 v1, s3, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; SDAG-GFX12-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; SDAG-GFX12-NEXT:    global_store_b32 v2, v0, s[0:1]
-; SDAG-GFX12-NEXT:    s_endpgm
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin_sgpr:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; SDAG-GFX12-TRUE16-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.l, s2
+; SDAG-GFX12-TRUE16-NEXT:    v_mov_b16_e32 v0.h, s3
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v1.l, v0.h, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SDAG-GFX12-TRUE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; SDAG-GFX12-TRUE16-NEXT:    global_store_b32 v2, v0, s[0:1]
+; SDAG-GFX12-TRUE16-NEXT:    s_endpgm
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin_sgpr:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
+; SDAG-GFX12-FAKE16-NEXT:    v_mov_b32_e32 v2, 0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, s2, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v1, s3, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SDAG-GFX12-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; SDAG-GFX12-FAKE16-NEXT:    global_store_b32 v2, v0, s[0:1]
+; SDAG-GFX12-FAKE16-NEXT:    s_endpgm
 ;
 ; GISEL-VI-LABEL: basic_smax_smin_sgpr:
 ; GISEL-VI:       ; %bb.0:
@@ -300,18 +340,29 @@ define <2 x i16> @basic_smin_smax(i16 %src0, i16 %src1) {
 ; SDAG-GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smin_smax:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; SDAG-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smin_smax:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smin_smax:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smin_smax:
 ; GISEL-VI:       ; %bb.0:
@@ -344,19 +395,30 @@ define <2 x i16> @basic_smin_smax(i16 %src0, i16 %src1) {
 ; GISEL-GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smin_smax:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GISEL-GFX12-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smin_smax:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smin_smax:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
   %src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)
@@ -398,18 +460,29 @@ define <2 x i16> @basic_smin_smax_combined(i16 %src0, i16 %src1) {
 ; SDAG-GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smin_smax_combined:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; SDAG-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smin_smax_combined:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smin_smax_combined:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smin_smax_combined:
 ; GISEL-VI:       ; %bb.0:
@@ -442,19 +515,30 @@ define <2 x i16> @basic_smin_smax_combined(i16 %src0, i16 %src1) {
 ; GISEL-GFX11-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smin_smax_combined:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GISEL-GFX12-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smin_smax_combined:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smin_smax_combined:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.min = call i16 @llvm.smin.i16(i16 %src0, i16 255)
   %src0.clamp = call i16 @llvm.smax.i16(i16 %src0.min, i16 0)
@@ -825,19 +909,33 @@ define i16 @basic_smax_smin_bit_or(i16 %src0, i16 %src1) {
 ; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin_bit_or:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin_bit_or:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin_bit_or:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smax_smin_bit_or:
 ; GISEL-VI:       ; %bb.0:
@@ -860,19 +958,33 @@ define i16 @basic_smax_smin_bit_or(i16 %src0, i16 %src1) {
 ; GISEL-GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smax_smin_bit_or:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GISEL-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smax_smin_bit_or:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smax_smin_bit_or:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
   %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
@@ -912,19 +1024,33 @@ define i16 @basic_umax_umin_bit_or(i16 %src0, i16 %src1) {
 ; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_umax_umin_bit_or:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_min_u16 v1, 0xff, v1
-; SDAG-GFX12-NEXT:    v_min_u16 v0, 0xff, v0
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_umax_umin_bit_or:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_min_u16 v0.h, 0xff, v1.l
+; SDAG-GFX12-TRUE16-NEXT:    v_min_u16 v0.l, 0xff, v0.l
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_umax_umin_bit_or:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_min_u16 v1, 0xff, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_min_u16 v0, 0xff, v0
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_umax_umin_bit_or:
 ; GISEL-VI:       ; %bb.0:
@@ -944,19 +1070,33 @@ define i16 @basic_umax_umin_bit_or(i16 %src0, i16 %src1) {
 ; GISEL-GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_umax_umin_bit_or:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_min_u16 v1, 0xff, v1
-; GISEL-GFX12-NEXT:    v_min_u16 v0, 0xff, v0
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GISEL-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_umax_umin_bit_or:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_min_u16 v0.h, 0xff, v1.l
+; GISEL-GFX12-TRUE16-NEXT:    v_min_u16 v0.l, 0xff, v0.l
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_umax_umin_bit_or:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_min_u16 v1, 0xff, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_min_u16 v0, 0xff, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.max = call i16 @llvm.umax.i16(i16 %src0, i16 0)
   %src0.clamp = call i16 @llvm.umin.i16(i16 %src0.max, i16 255)
@@ -1001,21 +1141,37 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
 ; SDAG-GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin_vec_cast:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; SDAG-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG-GFX12-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin_vec_cast:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    v_and_b16 v0.l, 0xff, v0.l
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin_vec_cast:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; SDAG-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smax_smin_vec_cast:
 ; GISEL-VI:       ; %bb.0:
@@ -1048,19 +1204,33 @@ define i16 @basic_smax_smin_vec_cast(i16 %src0, i16 %src1) {
 ; GISEL-GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smax_smin_vec_cast:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_med3_i16 v1, v1, 0, 0xff
-; GISEL-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GISEL-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smax_smin_vec_cast:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.h, v1.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smax_smin_vec_cast:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v1, v1, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
   %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
@@ -1103,19 +1273,33 @@ define i16 @basic_smax_smin_bit_shl(i16 %src0, i16 %src1) {
 ; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin_bit_shl:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_max_i16 v1, v1, 0
-; SDAG-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin_bit_shl:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_max_i16 v0.h, v1.l, 0
+; SDAG-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin_bit_shl:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_max_i16 v1, v1, 0
+; SDAG-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smax_smin_bit_shl:
 ; GISEL-VI:       ; %bb.0:
@@ -1137,19 +1321,33 @@ define i16 @basic_smax_smin_bit_shl(i16 %src0, i16 %src1) {
 ; GISEL-GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smax_smin_bit_shl:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_max_i16 v1, v1, 0
-; GISEL-GFX12-NEXT:    v_med3_i16 v0, v0, 0, 0xff
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GISEL-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smax_smin_bit_shl:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_max_i16 v0.h, v1.l, 0
+; GISEL-GFX12-TRUE16-NEXT:    v_med3_i16 v0.l, v0.l, 0, 0xff
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.h, 8, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.l, v0.h
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smax_smin_bit_shl:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_max_i16 v1, v1, 0
+; GISEL-GFX12-FAKE16-NEXT:    v_med3_i16 v0, v0, 0, 0xff
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %src0.max = call i16 @llvm.smax.i16(i16 %src0, i16 0)
   %src0.clamp = call i16 @llvm.smin.i16(i16 %src0.max, i16 255)
@@ -1194,21 +1392,37 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
 ; SDAG-GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin_vec_input:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_pk_max_i16 v0, v0, 0
-; SDAG-GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin_vec_input:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_pk_max_i16 v1, v0, 0
+; SDAG-GFX12-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.l, 8, v0.l
+; SDAG-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v1.l, v0.l
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin_vec_input:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_pk_max_i16 v0, v0, 0
+; SDAG-GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smax_smin_vec_input:
 ; GISEL-VI:       ; %bb.0:
@@ -1249,24 +1463,42 @@ define i16 @basic_smax_smin_vec_input(<2 x i16> %src) {
 ; GISEL-GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smax_smin_vec_input:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_pk_min_i16 v0, 0xff00ff, v0
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_pk_max_i16 v0, 0, v0
-; GISEL-GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GISEL-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smax_smin_vec_input:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_pk_max_i16 v1, 0, v0
+; GISEL-GFX12-TRUE16-NEXT:    v_and_b16 v0.l, 0xff, v1.h
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GISEL-GFX12-TRUE16-NEXT:    v_and_b16 v0.h, 0xff, v1.l
+; GISEL-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.l, 8, v0.l
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v0.h, v0.l
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smax_smin_vec_input:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_pk_max_i16 v0, 0, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %smin = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 255, i16 255>, <2 x i16> %src)
   %smed = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 0, i16 0>, <2 x i16> %smin)
@@ -1310,21 +1542,37 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
 ; SDAG-GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SDAG-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; SDAG-GFX12-LABEL: basic_smax_smin_vec_input_rev:
-; SDAG-GFX12:       ; %bb.0:
-; SDAG-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_expcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_samplecnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; SDAG-GFX12-NEXT:    s_wait_kmcnt 0x0
-; SDAG-GFX12-NEXT:    v_pk_max_i16 v0, v0, 0
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
-; SDAG-GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; SDAG-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; SDAG-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; SDAG-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; SDAG-GFX12-TRUE16-LABEL: basic_smax_smin_vec_input_rev:
+; SDAG-GFX12-TRUE16:       ; %bb.0:
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-TRUE16-NEXT:    v_pk_max_i16 v0, v0, 0
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_pk_min_i16 v1, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX12-TRUE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
+; SDAG-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.l, 8, v0.l
+; SDAG-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v1.l, v0.l
+; SDAG-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; SDAG-GFX12-FAKE16-LABEL: basic_smax_smin_vec_input_rev:
+; SDAG-GFX12-FAKE16:       ; %bb.0:
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; SDAG-GFX12-FAKE16-NEXT:    v_pk_max_i16 v0, v0, 0
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; SDAG-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; SDAG-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; SDAG-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; SDAG-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GISEL-VI-LABEL: basic_smax_smin_vec_input_rev:
 ; GISEL-VI:       ; %bb.0:
@@ -1364,24 +1612,39 @@ define i16 @basic_smax_smin_vec_input_rev(<2 x i16> %src) {
 ; GISEL-GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GISEL-GFX11-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GISEL-GFX12-LABEL: basic_smax_smin_vec_input_rev:
-; GISEL-GFX12:       ; %bb.0:
-; GISEL-GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_expcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_samplecnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_bvhcnt 0x0
-; GISEL-GFX12-NEXT:    s_wait_kmcnt 0x0
-; GISEL-GFX12-NEXT:    v_pk_max_i16 v0, 0, v0
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_pk_min_i16 v0, 0xff00ff, v0
-; GISEL-GFX12-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v0, 0xff, v0
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_and_b32_e32 v1, 0xff, v1
-; GISEL-GFX12-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GISEL-GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GISEL-GFX12-NEXT:    v_or_b32_e32 v0, v0, v1
-; GISEL-GFX12-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-GFX12-TRUE16-LABEL: basic_smax_smin_vec_input_rev:
+; GISEL-GFX12-TRUE16:       ; %bb.0:
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-TRUE16-NEXT:    v_pk_max_i16 v0, 0, v0
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_pk_min_i16 v1, 0xff00ff, v0
+; GISEL-GFX12-TRUE16-NEXT:    v_lshlrev_b16 v0.l, 8, v1.h
+; GISEL-GFX12-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX12-TRUE16-NEXT:    v_or_b16 v0.l, v1.l, v0.l
+; GISEL-GFX12-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-GFX12-FAKE16-LABEL: basic_smax_smin_vec_input_rev:
+; GISEL-GFX12-FAKE16:       ; %bb.0:
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_loadcnt_dscnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_expcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_samplecnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_bvhcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    s_wait_kmcnt 0x0
+; GISEL-GFX12-FAKE16-NEXT:    v_pk_max_i16 v0, 0, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v0, 0xff, v0
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_and_b32_e32 v1, 0xff, v1
+; GISEL-GFX12-FAKE16-NEXT:    v_lshlrev_b16 v1, 8, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX12-FAKE16-NEXT:    v_or_b32_e32 v0, v0, v1
+; GISEL-GFX12-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 
   %smax = call <2 x i16> @llvm.smax.v2i16(<2 x i16> <i16 0, i16 0>, <2 x i16> %src)
   %smed = call <2 x i16> @llvm.smin.v2i16(<2 x i16> <i16 255, i16 255>, <2 x i16> %smax)


        


More information about the llvm-commits mailing list