[llvm] a24fae3 - [AMDGPU][True16][GlobalISel] Fix v2*16 build_vector patterns (#151496)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 4 04:06:09 PDT 2025


Author: Mirko BrkuĊĦanin
Date: 2025-08-04T13:06:06+02:00
New Revision: a24fae3aefd14cfff4bd911101cd0a1fb4dfdafe

URL: https://github.com/llvm/llvm-project/commit/a24fae3aefd14cfff4bd911101cd0a1fb4dfdafe
DIFF: https://github.com/llvm/llvm-project/commit/a24fae3aefd14cfff4bd911101cd0a1fb4dfdafe.diff

LOG: [AMDGPU][True16][GlobalISel] Fix v2*16 build_vector patterns (#151496)

- Pattern with IMPLICIT_DEF failed to generate an entry in MatchTable and
did not report an error, just silently failed. This is fixed by casting
IMPLICIT_DEF to appropriate type. This also fixes selecting
"build_vector s16, undef" for GlobalISel with True16.
- Add pattern for "build_vector undef, s16" that will work for GlobalISel.
True16 GlobalISel has a G_TRUNC that it needs to deal with.
- Use REG_SEQUENCE for Real16 patterns instead of V_LSHLREV_B32_e64 to
generate more optimal code.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
    llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
    llvm/test/CodeGen/AMDGPU/fmaximum.ll
    llvm/test/CodeGen/AMDGPU/fminimum.ll
    llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
    llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
    llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
    llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 54fa192aeec92..bd5dfa92a8e43 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3543,14 +3543,21 @@ def : GCNPat <
   (vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
   (S_LSHL_B32 SReg_32:$src1, (i32 16))
 >;
-}
 
 def : GCNPat <
   (vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
   (vecTy (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
 >;
+} // End True16Predicate = ...
 } // End foreach Ty = ...
-}
+} // End AddedComplexity = 1
+
+let True16Predicate = UseRealTrue16Insts in
+def : GCNPat <
+  (v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
+  (REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16,
+                         (i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)), hi16)
+>;
 
 let SubtargetPredicate = HasVOP3PInsts in {
 foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
@@ -3599,7 +3606,11 @@ def : GCNPat <
 >;
 def : GCNPat <
   (vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$src0), (Ty undef))),
-  (REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16)
+  (REG_SEQUENCE VGPR_32, $src0, lo16, (Ty (IMPLICIT_DEF)), hi16)
+>;
+def : GCNPat <
+  (vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_16:$src1))),
+  (REG_SEQUENCE VGPR_32, (Ty (IMPLICIT_DEF)), lo16, (Ty VGPR_16:$src1), hi16)
 >;
 }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
index bdb52dbb95fa7..d1a1112777aae 100644
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@@ -1,8 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefixes=GFX8,GFX8-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16,GFX11-FAKE16-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16,GFX11-FAKE16-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16,GFX11-TRUE16-SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16,GFX11-TRUE16-GISEL %s
 
 define void @undef_lo_v2i16(i16 %arg0) {
+; GFX8-SDAG-LABEL: undef_lo_v2i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo_v2i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12,20 +37,48 @@ define void @undef_lo_v2i16(i16 %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo_v2i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo_v2i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: undef_lo_v2i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = insertelement <2 x i16> poison, i16 %arg0, i32 1
   call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.lo);
   ret void
 }
 
 define void @undef_lo_v2f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_lo_v2f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo_v2f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -35,20 +88,52 @@ define void @undef_lo_v2f16(half %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo_v2f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: undef_lo_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = insertelement <2 x half> poison, half %arg0, i32 1
   call void asm sideeffect "; use $0", "v"(<2 x half> %undef.lo);
   ret void
 }
 
 define void @undef_lo_op_v2f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_lo_op_v2f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v1, 0x3c00
+; GFX8-SDAG-NEXT:    v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, 0x7e00, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo_op_v2f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3c00
+; GFX8-GISEL-NEXT:    v_add_f16_e64 v1, s4, 1.0
+; GFX8-GISEL-NEXT:    v_add_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo_op_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -59,16 +144,27 @@ define void @undef_lo_op_v2f16(half %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo_op_v2f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x3c00
-; GFX8-NEXT:    v_add_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v0, 0x7e00, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo_op_v2f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v0
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: undef_lo_op_v2f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v0
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = insertelement <2 x half> poison, half %arg0, i32 1
   %op = fadd <2 x half> %undef.lo, <half 1.0, half 1.0>
   call void asm sideeffect "; use $0", "v"(<2 x half> %op);
@@ -76,26 +172,93 @@ define void @undef_lo_op_v2f16(half %arg0) {
 }
 
 define void @undef_lo_op_v2i16(i16 %arg0) {
-; GFX9-LABEL: undef_lo_op_v2i16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NEXT:    s_movk_i32 s4, 0x63
-; GFX9-NEXT:    v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use v0
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_mov_b32_e32 v1, 0x63
+; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo_op_v2i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v1, 0x63
-; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0x63
+; GFX8-GISEL-NEXT:    s_and_b32 s4, 0xffff, s4
+; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x63
+; GFX9-SDAG-NEXT:    v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT:    ;;#ASMSTART
+; GFX9-SDAG-NEXT:    ; use v0
+; GFX9-SDAG-NEXT:    ;;#ASMEND
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0x630063
+; GFX9-GISEL-NEXT:    v_pk_add_u16 v0, v0, v1
+; GFX9-GISEL-NEXT:    ;;#ASMSTART
+; GFX9-GISEL-NEXT:    ; use v0
+; GFX9-GISEL-NEXT:    ;;#ASMEND
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX11-FAKE16-SDAG:       ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-SDAG-NEXT:    v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-FAKE16-SDAG-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-SDAG-NEXT:    ; use v0
+; GFX11-FAKE16-SDAG-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX11-FAKE16-GISEL:       ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-GISEL-NEXT:    v_pk_add_u16 v0, 0x630063, v0
+; GFX11-FAKE16-GISEL-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-GISEL-NEXT:    ; use v0
+; GFX11-FAKE16-GISEL-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_lo_op_v2i16:
+; GFX11-TRUE16-SDAG:       ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT:    v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT:    ; use v0
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_lo_op_v2i16:
+; GFX11-TRUE16-GISEL:       ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-GISEL-NEXT:    v_pk_add_u16 v0, 0x630063, v0
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT:    ; use v0
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = insertelement <2 x i16> poison, i16 %arg0, i32 1
   %op = add <2 x i16> %undef.lo, <i16 99, i16 99>
   call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
@@ -103,6 +266,26 @@ define void @undef_lo_op_v2i16(i16 %arg0) {
 }
 
 define void @undef_lo3_v4i16(i16 %arg0) {
+; GFX8-SDAG-LABEL: undef_lo3_v4i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo3_v4i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo3_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -112,20 +295,49 @@ define void @undef_lo3_v4i16(i16 %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo3_v4i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo3_v4i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v[0:1]
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: undef_lo3_v4i16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v[0:1]
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = insertelement <4 x i16> poison, i16 %arg0, i32 1
   call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
   ret void
 }
 
 define void @undef_lo3_v4f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_lo3_v4f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo3_v4f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo3_v4f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -135,20 +347,50 @@ define void @undef_lo3_v4f16(half %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo3_v4f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo3_v4f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v[0:1]
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: undef_lo3_v4f16:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v0.l
+; GFX11-TRUE16-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-NEXT:    ; use v[0:1]
+; GFX11-TRUE16-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = insertelement <4 x half> poison, half %arg0, i32 1
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
   ret void
 }
 
 define void @undef_lo2_v4i16(<2 x i16> %arg0) {
+; GFX8-SDAG-LABEL: undef_lo2_v4i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-SDAG-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo2_v4i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-GISEL-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo2_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -159,21 +401,62 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo2_v4i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT:    v_alignbit_b32 v0, v1, v0, 16
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo2_v4i16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v0, v0, 0x7060302
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v[0:1]
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_lo2_v4i16:
+; GFX11-TRUE16-SDAG:       ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT:    v_mov_b16_e32 v1.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-TRUE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT:    ; use v[0:1]
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_lo2_v4i16:
+; GFX11-TRUE16-GISEL:       ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT:    ; use v[0:1]
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
   ret void
 }
 
 define void @undef_lo2_v4f16(<2 x half> %arg0) {
+; GFX8-SDAG-LABEL: undef_lo2_v4f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-SDAG-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_lo2_v4f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-GISEL-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_lo2_v4f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -184,21 +467,57 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_lo2_v4f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; GFX8-NEXT:    v_alignbit_b32 v0, v1, v0, 16
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-FAKE16-LABEL: undef_lo2_v4f16:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT:    v_perm_b32 v0, v0, v0, 0x7060302
+; GFX11-FAKE16-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-NEXT:    ; use v[0:1]
+; GFX11-FAKE16-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_lo2_v4f16:
+; GFX11-TRUE16-SDAG:       ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT:    v_mov_b16_e32 v1.l, v0.h
+; GFX11-TRUE16-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-SDAG-NEXT:    v_bfi_b32 v0, 0xffff, v1, v0
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT:    ; use v[0:1]
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_lo2_v4f16:
+; GFX11-TRUE16-GISEL:       ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT:    ; use v[0:1]
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %undef.lo = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
   ret void
 }
 
 define void @undef_hi_v2i16(i16 %arg0) {
+; GFX8-SDAG-LABEL: undef_hi_v2i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi_v2i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -207,19 +526,36 @@ define void @undef_hi_v2i16(i16 %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi_v2i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi_v2i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v0
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = insertelement <2 x i16> poison, i16 %arg0, i32 0
   call void asm sideeffect "; use $0", "v"(<2 x i16> %undef.hi);
   ret void
 }
 
 define void @undef_hi_v2f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_hi_v2f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi_v2f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -228,19 +564,42 @@ define void @undef_hi_v2f16(half %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi_v2f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi_v2f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v0
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = insertelement <2 x half> poison, half %arg0, i32 0
   call void asm sideeffect "; use $0", "v"(<2 x half> %undef.hi);
   ret void
 }
 
 define void @undef_hi_op_v2f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_hi_op_v2f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, 0x7e000000, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi_op_v2f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s4
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, 0x3c00
+; GFX8-GISEL-NEXT:    v_add_f16_e32 v0, 1.0, v0
+; GFX8-GISEL-NEXT:    v_add_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi_op_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -250,15 +609,14 @@ define void @undef_hi_op_v2f16(half %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi_op_v2f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_add_f16_e32 v0, 1.0, v0
-; GFX8-NEXT:    v_or_b32_e32 v0, 0x7e000000, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi_op_v2f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_pk_add_f16 v0, v0, 1.0 op_sel_hi:[1,0]
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v0
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = insertelement <2 x half> poison, half %arg0, i32 0
   %op = fadd <2 x half> %undef.hi, <half 1.0, half 1.0>
   call void asm sideeffect "; use $0", "v"(<2 x half> %op);
@@ -266,24 +624,82 @@ define void @undef_hi_op_v2f16(half %arg0) {
 }
 
 define void @undef_hi_op_v2i16(i16 %arg0) {
-; GFX9-LABEL: undef_hi_op_v2i16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    s_movk_i32 s4, 0x63
-; GFX9-NEXT:    v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
-; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use v0
-; GFX9-NEXT:    ;;#ASMEND
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-SDAG-LABEL: undef_hi_op_v2i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    v_add_u16_e32 v0, 0x63, v0
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v0
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi_op_v2i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_add_u16_e32 v0, 0x63, v0
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v0
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX8-GISEL-LABEL: undef_hi_op_v2i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_and_b32 s4, 0xffff, s4
+; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 0x63, v0
+; GFX8-GISEL-NEXT:    s_lshl_b32 s4, s4, 16
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v0
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: undef_hi_op_v2i16:
+; GFX9-SDAG:       ; %bb.0:
+; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT:    s_movk_i32 s4, 0x63
+; GFX9-SDAG-NEXT:    v_pk_add_u16 v0, v0, s4 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT:    ;;#ASMSTART
+; GFX9-SDAG-NEXT:    ; use v0
+; GFX9-SDAG-NEXT:    ;;#ASMEND
+; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: undef_hi_op_v2i16:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0x630063
+; GFX9-GISEL-NEXT:    v_pk_add_u16 v0, v0, v1
+; GFX9-GISEL-NEXT:    ;;#ASMSTART
+; GFX9-GISEL-NEXT:    ; use v0
+; GFX9-GISEL-NEXT:    ;;#ASMEND
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-SDAG-LABEL: undef_hi_op_v2i16:
+; GFX11-FAKE16-SDAG:       ; %bb.0:
+; GFX11-FAKE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-SDAG-NEXT:    v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-FAKE16-SDAG-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-SDAG-NEXT:    ; use v0
+; GFX11-FAKE16-SDAG-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-GISEL-LABEL: undef_hi_op_v2i16:
+; GFX11-FAKE16-GISEL:       ; %bb.0:
+; GFX11-FAKE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-GISEL-NEXT:    v_pk_add_u16 v0, 0x630063, v0
+; GFX11-FAKE16-GISEL-NEXT:    ;;#ASMSTART
+; GFX11-FAKE16-GISEL-NEXT:    ; use v0
+; GFX11-FAKE16-GISEL-NEXT:    ;;#ASMEND
+; GFX11-FAKE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-SDAG-LABEL: undef_hi_op_v2i16:
+; GFX11-TRUE16-SDAG:       ; %bb.0:
+; GFX11-TRUE16-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-SDAG-NEXT:    v_pk_add_u16 v0, 0x63, v0 op_sel_hi:[0,1]
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-SDAG-NEXT:    ; use v0
+; GFX11-TRUE16-SDAG-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-GISEL-LABEL: undef_hi_op_v2i16:
+; GFX11-TRUE16-GISEL:       ; %bb.0:
+; GFX11-TRUE16-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-GISEL-NEXT:    v_pk_add_u16 v0, 0x630063, v0
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMSTART
+; GFX11-TRUE16-GISEL-NEXT:    ; use v0
+; GFX11-TRUE16-GISEL-NEXT:    ;;#ASMEND
+; GFX11-TRUE16-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = insertelement <2 x i16> poison, i16 %arg0, i32 0
   %op = add <2 x i16> %undef.hi, <i16 99, i16 99>
   call void asm sideeffect "; use $0", "v"(<2 x i16> %op);
@@ -291,6 +707,24 @@ define void @undef_hi_op_v2i16(i16 %arg0) {
 }
 
 define void @undef_hi3_v4i16(i16 %arg0) {
+; GFX8-SDAG-LABEL: undef_hi3_v4i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi3_v4i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi3_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -299,19 +733,37 @@ define void @undef_hi3_v4i16(i16 %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi3_v4i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi3_v4i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v[0:1]
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = insertelement <4 x i16> poison, i16 %arg0, i32 0
   call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
   ret void
 }
 
 define void @undef_hi3_v4f16(half %arg0) {
+; GFX8-SDAG-LABEL: undef_hi3_v4f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi3_v4f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi3_v4f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -320,19 +772,39 @@ define void @undef_hi3_v4f16(half %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi3_v4f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi3_v4f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v[0:1]
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = insertelement <4 x half> poison, half %arg0, i32 0
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
   ret void
 }
 
 define void @undef_hi2_v4i16(<2 x i16> %arg0) {
+; GFX8-SDAG-LABEL: undef_hi2_v4i16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi2_v4i16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi2_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -341,19 +813,39 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi2_v4i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi2_v4i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v[0:1]
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
   ret void
 }
 
 define void @undef_hi2_v4f16(<2 x half> %arg0) {
+; GFX8-SDAG-LABEL: undef_hi2_v4f16:
+; GFX8-SDAG:       ; %bb.0:
+; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT:    ;;#ASMSTART
+; GFX8-SDAG-NEXT:    ; use v[0:1]
+; GFX8-SDAG-NEXT:    ;;#ASMEND
+; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: undef_hi2_v4f16:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-GISEL-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX8-GISEL-NEXT:    ;;#ASMSTART
+; GFX8-GISEL-NEXT:    ; use v[0:1]
+; GFX8-GISEL-NEXT:    ;;#ASMEND
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
 ; GFX9-LABEL: undef_hi2_v4f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -362,15 +854,16 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX8-LABEL: undef_hi2_v4f16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    ;;#ASMSTART
-; GFX8-NEXT:    ; use v[0:1]
-; GFX8-NEXT:    ;;#ASMEND
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: undef_hi2_v4f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    ;;#ASMSTART
+; GFX11-NEXT:    ; use v[0:1]
+; GFX11-NEXT:    ;;#ASMEND
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %undef.hi = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
   ret void
 }
-
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX8: {{.*}}

diff  --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 75247500f1381..5fb50d0d89530 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -2608,9 +2608,7 @@ define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
 ; GFX11-TRUE16-LABEL: v_test_canonicalize_undef_reg_v2f16:
 ; GFX11-TRUE16:       ; %bb.0:
 ; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.l, v0.l, v0.l
-; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11-TRUE16-NEXT:    v_max_f16_e32 v0.h, v0.l, v0.l
 ; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-FAKE16-LABEL: v_test_canonicalize_undef_reg_v2f16:

diff  --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
index 0adbecd952ae3..e59fbada6793d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
@@ -173,8 +173,8 @@ define amdgpu_ps <3 x half> @test_fmaximum_v3f16_vv(<3 x half> %a, <3 x half> %b
 ;
 ; GFX12-GISEL-TRUE16-LABEL: test_fmaximum_v3f16_vv:
 ; GFX12-GISEL-TRUE16:       ; %bb.0:
-; GFX12-GISEL-TRUE16-NEXT:    v_maximum_f16 v1.l, v1.l, v3.l
 ; GFX12-GISEL-TRUE16-NEXT:    v_pk_maximum_f16 v0, v0, v2
+; GFX12-GISEL-TRUE16-NEXT:    v_maximum_f16 v1.l, v1.l, v3.l
 ; GFX12-GISEL-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-FAKE16-LABEL: test_fmaximum_v3f16_vv:

diff  --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll
index e1d35b52defee..b25120f2ece6f 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll
@@ -173,8 +173,8 @@ define amdgpu_ps <3 x half> @test_fminimum_v3f16_vv(<3 x half> %a, <3 x half> %b
 ;
 ; GFX12-GISEL-TRUE16-LABEL: test_fminimum_v3f16_vv:
 ; GFX12-GISEL-TRUE16:       ; %bb.0:
-; GFX12-GISEL-TRUE16-NEXT:    v_minimum_f16 v1.l, v1.l, v3.l
 ; GFX12-GISEL-TRUE16-NEXT:    v_pk_minimum_f16 v0, v0, v2
+; GFX12-GISEL-TRUE16-NEXT:    v_minimum_f16 v1.l, v1.l, v3.l
 ; GFX12-GISEL-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-FAKE16-LABEL: test_fminimum_v3f16_vv:

diff  --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
index 723e3ef15553a..1602e31d6147c 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
@@ -4326,9 +4326,8 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16hi_undef_hi(ptr addrspace(
 ;
 ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16hi_undef_hi:
 ; GFX12-GISEL-TRUE16:       ; %bb.0:
-; GFX12-GISEL-TRUE16-NEXT:    global_load_d16_b16 v0, v0, s[2:3]
+; GFX12-GISEL-TRUE16-NEXT:    global_load_d16_hi_b16 v0, v0, s[2:3]
 ; GFX12-GISEL-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX12-GISEL-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16hi_undef_hi:
@@ -4366,9 +4365,8 @@ define amdgpu_ps <2 x half> @global_load_saddr_i16_d16hi_undef_hi_immneg128(ptr
 ;
 ; GFX12-GISEL-TRUE16-LABEL: global_load_saddr_i16_d16hi_undef_hi_immneg128:
 ; GFX12-GISEL-TRUE16:       ; %bb.0:
-; GFX12-GISEL-TRUE16-NEXT:    global_load_d16_b16 v0, v0, s[2:3] offset:-128
+; GFX12-GISEL-TRUE16-NEXT:    global_load_d16_hi_b16 v0, v0, s[2:3] offset:-128
 ; GFX12-GISEL-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX12-GISEL-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX12-GISEL-TRUE16-NEXT:    ; return to shader part epilog
 ;
 ; GFX12-GISEL-FAKE16-LABEL: global_load_saddr_i16_d16hi_undef_hi_immneg128:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index 0e66b0af99f34..22f562ab8557b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -784,13 +784,13 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v5, 0x7fff
 ; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v5
 ; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v5
 ; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v5
-; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v4.l
-; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v5
 ; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
 ; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v0.h, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v4.l
 ; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
@@ -910,9 +910,9 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
 ; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
 ; GFX11-GISEL-TRUE16:       ; %bb.0:
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v3.l
 ; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
 ; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v0.h, v2.h
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v3.l
 ; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i16:

diff  --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index 88c619e6182ed..1ae3434db6da5 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -372,9 +372,8 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt(half %
 ; SDAG-GFX11-TRUE16:       ; %bb.0:
 ; SDAG-GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; SDAG-GFX11-TRUE16-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; SDAG-GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; SDAG-GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
-; SDAG-GFX11-TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; SDAG-GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.h, v0
 ; SDAG-GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SDAG-GFX11-FAKE16-LABEL: v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_precvt:

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index 141b86a24c1c4..4a6202ea82944 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -493,9 +493,9 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX11-GISEL-TRUE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:
 ; GFX11-GISEL-TRUE16:       ; %bb.0:
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v3.l
 ; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v2.l
 ; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v0.h, v0.h, v2.h
+; GFX11-GISEL-TRUE16-NEXT:    v_sub_f16_e32 v1.l, v1.l, v3.l
 ; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-GISEL-FAKE16-LABEL: v_constained_fsub_v3f16_fpexcept_strict:

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
index 84fe4ec677ccc..98d0a6281054a 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
@@ -299,13 +299,13 @@ define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a,
 ; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v0, 0x7fff
 ; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v6, v0
-; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v0
-; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v0
-; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v3.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v4, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v5, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v5, 0xffff8000, v6, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v1.l
 ; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v2.l, v4.l
-; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v2.h, v5.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v2.h, v4.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v3.l, v5.l
 ; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:


        


More information about the llvm-commits mailing list