[llvm] afd42fb - [AMDGPU][True16][CodeGen] Support AND/OR/XOR and LDEXP True16 format (#102620)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 13 09:23:42 PDT 2024


Author: Brox Chen
Date: 2024-08-13T12:23:39-04:00
New Revision: afd42fb3038904e8c09c0fb735e713bc052ec0e4

URL: https://github.com/llvm/llvm-project/commit/afd42fb3038904e8c09c0fb735e713bc052ec0e4
DIFF: https://github.com/llvm/llvm-project/commit/afd42fb3038904e8c09c0fb735e713bc052ec0e4.diff

LOG: [AMDGPU][True16][CodeGen] Support AND/OR/XOR and LDEXP True16 format (#102620)

Support AND/OR/XOR true16 and LDEXP true/fake16 format.

These instructions are previously implemented with fake16 profile.
Fixing the implementation.

Added a RA hint so that when using 16bit register in a 32bit
instruction, try to use the register directly without an extra 16bit
move

---------

Co-authored-by: guochen2 <guochen2 at amd.com>

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/lib/Target/AMDGPU/VOP1Instructions.td
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/lib/Target/AMDGPU/VOP3Instructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
    llvm/test/CodeGen/AMDGPU/bf16.ll
    llvm/test/CodeGen/AMDGPU/fadd.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
    llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f35bb204cbbdb7..05ed1b322c0d1b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -161,18 +161,34 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
 
         // TODO: Skip masking high bits if def is known boolean.
 
-        bool IsSGPR = TRI.isSGPRClass(SrcRC);
-        unsigned AndOpc =
-            IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
-        auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
-            .addImm(1)
-            .addReg(SrcReg);
-        if (IsSGPR)
-          And.setOperandDead(3); // Dead scc
-
-        BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
-            .addImm(0)
-            .addReg(MaskedReg);
+        if (AMDGPU::getRegBitWidth(SrcRC->getID()) == 16) {
+          assert(Subtarget->useRealTrue16Insts());
+          const int64_t NoMods = 0;
+          BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
+              .addImm(NoMods)
+              .addImm(1)
+              .addImm(NoMods)
+              .addReg(SrcReg)
+              .addImm(NoMods);
+          BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
+              .addImm(NoMods)
+              .addImm(0)
+              .addImm(NoMods)
+              .addReg(MaskedReg)
+              .addImm(NoMods);
+        } else {
+          bool IsSGPR = TRI.isSGPRClass(SrcRC);
+          unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
+          auto And = BuildMI(*BB, &I, DL, TII.get(AndOpc), MaskedReg)
+                         .addImm(1)
+                         .addReg(SrcReg);
+          if (IsSGPR)
+            And.setOperandDead(3); // Dead scc
+
+          BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
+              .addImm(0)
+              .addReg(MaskedReg);
+        }
       }
 
       if (!MRI->getRegClassOrNull(SrcReg))
@@ -2206,6 +2222,16 @@ bool AMDGPUInstructionSelector::selectG_TRUNC(MachineInstr &I) const {
     return false;
   }
 
+  if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
+    assert(STI.useRealTrue16Insts());
+    const DebugLoc &DL = I.getDebugLoc();
+    MachineBasicBlock *MBB = I.getParent();
+    BuildMI(*MBB, I, DL, TII.get(AMDGPU::COPY), DstReg)
+        .addReg(SrcReg, 0, AMDGPU::lo16);
+    I.eraseFromParent();
+    return true;
+  }
+
   if (DstTy == LLT::fixed_vector(2, 16) && SrcTy == LLT::fixed_vector(2, 32)) {
     MachineBasicBlock *MBB = I.getParent();
     const DebugLoc &DL = I.getDebugLoc();

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index c41850ab55f75c..5a139d1cf8d825 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2030,6 +2030,8 @@ def : GCNPat <
 >;
 
 foreach fp16vt = [f16, bf16] in {
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let SubtargetPredicate = p in {
 def : GCNPat <
   (fabs (fp16vt VGPR_32:$src)),
   (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
@@ -2044,6 +2046,24 @@ def : GCNPat <
   (fneg (fabs (fp16vt VGPR_32:$src))),
   (V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
 >;
+}
+
+let SubtargetPredicate = UseRealTrue16Insts in {
+def : GCNPat <
+  (fabs (fp16vt VGPR_16:$src)),
+  (V_AND_B16_t16_e64 (i32 0), (i16 0x7fff), (i32 0), VGPR_16:$src)
+>;
+
+def : GCNPat <
+  (fneg (fp16vt VGPR_16:$src)),
+  (V_XOR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src)
+>;
+
+def : GCNPat <
+  (fneg (fabs (fp16vt VGPR_16:$src))),
+  (V_OR_B16_t16_e64 (i32 0), (i16 0x8000), (i32 0), VGPR_16:$src) // Set sign bit
+>;
+} // End SubtargetPredicate = UseRealTrue16Insts
 } // End foreach fp16vt = ...
 
 def : GCNPat <

diff  --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 5d38cafd73dd95..6e945bf368f4eb 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -152,6 +152,10 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
       if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
           !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
         return false;
+
+      if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
+          !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
+        return false;
     }
   }
   return true;

diff  --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 34d12aa5e07835..03e4cb9fcf49b7 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1397,7 +1397,8 @@ def : GCNPat <
 
 } // End OtherPredicates = [isGFX8Plus]
 
-let OtherPredicates = [isGFX8Plus] in {
+foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
+let OtherPredicates = [isGFX8Plus, p] in {
 def : GCNPat<
   (i32 (anyext i16:$src)),
   (COPY $src)
@@ -1420,7 +1421,43 @@ def : GCNPat <
   (EXTRACT_SUBREG $src, sub0)
 >;
 
-} // End OtherPredicates = [isGFX8Plus]
+} // End OtherPredicates = [isGFX8Plus, p]
+
+let OtherPredicates = [UseFakeTrue16Insts] in {
+def : GCNPat<
+  (i32 (DivergentUnaryFrag<anyext> i16:$src)),
+  (COPY $src)
+>;
+} // End OtherPredicates = [UseFakeTrue16Insts]
+
+
+let OtherPredicates = [UseRealTrue16Insts] in {
+def : GCNPat<
+  (i32 (UniformUnaryFrag<anyext> (i16 SReg_32:$src))),
+  (COPY $src)
+>;
+
+def : GCNPat<
+  (i32 (DivergentUnaryFrag<anyext> i16:$src)),
+  (REG_SEQUENCE VGPR_32, $src, lo16, (i16 (IMPLICIT_DEF)), hi16)
+>;
+
+def : GCNPat<
+  (i64 (anyext i16:$src)),
+  (REG_SEQUENCE VReg_64, $src, lo16, (i16 (IMPLICIT_DEF)), hi16, (i32 (IMPLICIT_DEF)), sub1)
+>;
+
+def : GCNPat<
+  (i16 (trunc i32:$src)),
+  (EXTRACT_SUBREG $src, lo16)
+>;
+
+def : GCNPat <
+  (i16 (trunc i64:$src)),
+  (EXTRACT_SUBREG $src, lo16)
+>;
+
+} // End OtherPredicates = [UseRealTrue16Insts]
 
 //===----------------------------------------------------------------------===//
 // GFX9

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index d17b4f24081312..fccaa27f361381 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -922,18 +922,25 @@ def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
   let HasSrc1FloatMods = 0;
   let Src1ModSDWA = Int16SDWAInputMods;
 }
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
+  let Src1RC32 = RegisterOperand<VGPR_16_Lo128>;
+  let Src1DPP = RegisterOperand<VGPR_16_Lo128>;
+  let Src1ModDPP = IntT16VRegInputMods<0/*IsFake16*/>;
+}
+def LDEXP_F16_VOPProfile_Fake16 : VOPProfile_Fake16<VOP_F16_F16_F16> {
   let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
   let Src1DPP = RegisterOperand<VGPR_32_Lo128>;
-  let Src1ModDPP = IntT16VRegInputMods</* IsFake16= */ 1>;
+  let Src1ModDPP = IntT16VRegInputMods<1/*IsFake16*/>;
 }
 
 let isReMaterializable = 1 in {
 let FPDPRounding = 1 in {
   let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in
     defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
-  let SubtargetPredicate = HasTrue16BitInsts in
+  let SubtargetPredicate = UseRealTrue16Insts in
     defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
+  let SubtargetPredicate = UseFakeTrue16Insts in
+    defm V_LDEXP_F16_fake16 : VOP2Inst <"v_ldexp_f16_fake16", LDEXP_F16_VOPProfile_Fake16, null_frag, "v_ldexp_f16_fake16">;
 } // End FPDPRounding = 1
 // FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
 defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@@ -968,14 +975,30 @@ class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.
 let OtherPredicates = [NotHasTrue16BitInsts] in
 def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
 
-let OtherPredicates = [HasTrue16BitInsts] in
-def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+class LDEXP_F16_t16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
+  (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+               (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
+  (inst $src0_modifiers, $src0,
+        $src1_modifiers, $src1,
+        $clamp, /* clamp */
+        $omod, /* omod */
+        0) /* op_sel */
+>;
+
+let OtherPredicates = [UseRealTrue16Insts] in
+def : LDEXP_F16_t16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+
+let OtherPredicates = [UseFakeTrue16Insts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_fake16_e64>;
 
 let SubtargetPredicate = isGFX11Plus in {
   let isCommutable = 1 in {
-    defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
-    defm V_OR_B16_t16  : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
-    defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
+    defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
+    defm V_AND_B16_fake16 : VOP2Inst_e64 <"v_and_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, and>;
+    defm V_OR_B16_t16  : VOP2Inst_e64 <"v_or_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, or>;
+    defm V_OR_B16_fake16  : VOP2Inst_e64 <"v_or_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, or>;
+    defm V_XOR_B16_t16 : VOP2Inst_e64 <"v_xor_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, xor>;
+    defm V_XOR_B16_fake16 : VOP2Inst_e64 <"v_xor_b16_fake16", VOPProfile_Fake16<VOP_I16_I16_I16>, xor>;
   } // End isCommutable = 1
 } // End SubtargetPredicate = isGFX11Plus
 
@@ -1714,6 +1737,7 @@ defm V_MUL_F16_t16         : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
 defm V_MUL_F16_fake16      : VOP2_Real_FULL_t16_gfx11_gfx12<0x035, "v_mul_f16">;
 defm V_FMAC_F16_t16        : VOP2_Real_FULL_t16_gfx11_gfx12<0x036, "v_fmac_f16">;
 defm V_LDEXP_F16_t16       : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
+defm V_LDEXP_F16_fake16    : VOP2_Real_FULL_t16_gfx11_gfx12<0x03b, "v_ldexp_f16">;
 defm V_MAX_F16_t16         : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
 defm V_MAX_F16_fake16      : VOP2_Real_FULL_t16_gfx11<0x039, "v_max_f16">;
 defm V_MIN_F16_t16         : VOP2_Real_FULL_t16_gfx11<0x03a, "v_min_f16">;

diff  --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index efa8e9c74d4495..6748eff9376b0d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1227,8 +1227,11 @@ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
   defm V_WRITELANE_B32     : VOP3_Real_No_Suffix_gfx11_gfx12<0x361>; // Pseudo in VOP2
 } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
 defm V_AND_B16_t16         : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
+defm V_AND_B16_fake16      : VOP3Only_Realtriple_t16_gfx11_gfx12<0x362, "v_and_b16">;
 defm V_OR_B16_t16          : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
+defm V_OR_B16_fake16       : VOP3Only_Realtriple_t16_gfx11_gfx12<0x363, "v_or_b16">;
 defm V_XOR_B16_t16         : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
+defm V_XOR_B16_fake16      : VOP3Only_Realtriple_t16_gfx11_gfx12<0x364, "v_xor_b16">;
 
 //===----------------------------------------------------------------------===//
 // GFX10.

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir
index 014534ab79fe64..0ff633fb4d8bec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fceil.s16.mir
@@ -49,10 +49,11 @@ body: |
     ; GFX11: liveins: $vgpr0
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
+    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
     ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
-    ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
+    ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
+    ; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
     ;
     ; GFX11-FAKE16-LABEL: name: fceil_s16_vv
     ; GFX11-FAKE16: liveins: $vgpr0
@@ -89,8 +90,9 @@ body: |
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
-    ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
+    ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
+    ; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
     ;
     ; GFX11-FAKE16-LABEL: name: fceil_s16_vs
     ; GFX11-FAKE16: liveins: $sgpr0
@@ -126,10 +128,11 @@ body: |
     ; GFX11: liveins: $vgpr0
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
+    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
     ; GFX11-NEXT: [[V_CEIL_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_CEIL_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_CEIL_F16_t16_e64_]]
-    ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
+    ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_CEIL_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
+    ; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
     ;
     ; GFX11-FAKE16-LABEL: name: fceil_fneg_s16_vv
     ; GFX11-FAKE16: liveins: $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
index dcf9e169f586be..fc8a6aaa17512b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ffloor.s16.mir
@@ -58,10 +58,11 @@ body: |
     ; GFX11: liveins: $vgpr0
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
+    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
     ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
-    ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
+    ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
+    ; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
     ;
     ; GFX11-FAKE16-LABEL: name: ffloor_s16_vv
     ; GFX11-FAKE16: liveins: $vgpr0
@@ -98,8 +99,9 @@ body: |
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
-    ; GFX11-NEXT: $vgpr0 = COPY [[COPY1]]
+    ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
+    ; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
     ;
     ; GFX11-FAKE16-LABEL: name: ffloor_s16_vs
     ; GFX11-FAKE16: liveins: $sgpr0
@@ -135,10 +137,11 @@ body: |
     ; GFX11: liveins: $vgpr0
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]]
+    ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_16 = COPY [[COPY]].lo16
     ; GFX11-NEXT: [[V_FLOOR_F16_t16_e64_:%[0-9]+]]:vgpr_16 = nofpexcept V_FLOOR_F16_t16_e64 1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
-    ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[V_FLOOR_F16_t16_e64_]]
-    ; GFX11-NEXT: $vgpr0 = COPY [[COPY2]]
+    ; GFX11-NEXT: [[DEF:%[0-9]+]]:vgpr_16 = IMPLICIT_DEF
+    ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vgpr_32 = REG_SEQUENCE [[V_FLOOR_F16_t16_e64_]], %subreg.lo16, [[DEF]], %subreg.hi16
+    ; GFX11-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
     ;
     ; GFX11-FAKE16-LABEL: name: ffloor_fneg_s16_vv
     ; GFX11-FAKE16: liveins: $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 970bb08e1838b2..d9ce1e4efe0e50 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -17180,11 +17180,17 @@ define bfloat @v_fabs_bf16(bfloat %a) {
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fabs_bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fabs_bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b16 v0.l, 0x7fff, v0.l
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fabs_bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 0x7fff, v0
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = call bfloat @llvm.fabs.bf16(bfloat %a)
   ret bfloat %op
 }
@@ -17266,11 +17272,17 @@ define bfloat @v_fneg_bf16(bfloat %a) {
 ; GFX10-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fneg_bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fneg_bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_xor_b16 v0.l, 0x8000, v0.l
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fneg_bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = fneg bfloat %a
   ret bfloat %op
 }
@@ -17365,11 +17377,17 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
 ; GFX10-NEXT:    v_or_b32_e32 v0, 0x8000, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fneg_fabs_bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_or_b32_e32 v0, 0x8000, v0
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fneg_fabs_bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_or_b16 v0.l, 0x8000, v0.l
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fneg_fabs_bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_or_b32_e32 v0, 0x8000, v0
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
   %op = fneg bfloat %fabs
   ret bfloat %op
@@ -25005,40 +25023,22 @@ define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
 ; GFX10-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11TRUE16-LABEL: v_frexp_bf16_i16:
-; GFX11TRUE16:       ; %bb.0:
-; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT:    v_frexp_mant_f32_e32 v1, v0
-; GFX11TRUE16-NEXT:    v_bfe_u32 v2, v1, 16, 1
-; GFX11TRUE16-NEXT:    v_or_b32_e32 v3, 0x400000, v1
-; GFX11TRUE16-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT:    v_add3_u32 v2, v2, v1, 0x7fff
-; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc_lo
-; GFX11TRUE16-NEXT:    v_frexp_exp_i32_f32_e32 v2, v0
-; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX11TRUE16-NEXT:    v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
-; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11FAKE16-LABEL: v_frexp_bf16_i16:
-; GFX11FAKE16:       ; %bb.0:
-; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
-; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11FAKE16-NEXT:    v_frexp_mant_f32_e32 v0, v1
-; GFX11FAKE16-NEXT:    v_bfe_u32 v2, v0, 16, 1
-; GFX11FAKE16-NEXT:    v_or_b32_e32 v3, 0x400000, v0
-; GFX11FAKE16-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
-; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11FAKE16-NEXT:    v_add3_u32 v2, v2, v0, 0x7fff
-; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11FAKE16-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
-; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-LABEL: v_frexp_bf16_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_frexp_mant_f32_e32 v0, v1
+; GFX11-NEXT:    v_bfe_u32 v2, v0, 16, 1
+; GFX11-NEXT:    v_or_b32_e32 v3, 0x400000, v0
+; GFX11-NEXT:    v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_add3_u32 v2, v2, v0, 0x7fff
+; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-NEXT:    v_frexp_exp_i32_f32_e32 v1, v1
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %op = call { bfloat, i16 } @llvm.frexp.bf16.i16(bfloat %a)
   ret { bfloat, i16 } %op
 }
@@ -28662,17 +28662,29 @@ define <2 x i16> @v_fptosi_v2bf16_to_v2i16(<2 x bfloat> %x) {
 ; GFX10-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fptosi_v2bf16_to_v2i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
+; GFX11TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = fptosi <2 x bfloat> %x to <2 x i16>
   ret <2 x i16> %op
 }
@@ -28753,19 +28765,33 @@ define <3 x i16> @v_fptosi_v3bf16_to_v3i16(<3 x bfloat> %x) {
 ; GFX10-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fptosi_v3bf16_to_v3i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff0000, v0
+; GFX11TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11TRUE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX11FAKE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = fptosi <3 x bfloat> %x to <3 x i16>
   ret <3 x i16> %op
 }
@@ -28867,23 +28893,41 @@ define <4 x i16> @v_fptosi_v4bf16_to_v4i16(<4 x bfloat> %x) {
 ; GFX10-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_fptosi_v4bf16_to_v4i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
-; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; GFX11-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v3, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff0000, v0
+; GFX11TRUE16-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
+; GFX11TRUE16-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GFX11TRUE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v2, v0, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v1, v3, v1, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
+; GFX11FAKE16-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v3, v3
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11FAKE16-NEXT:    v_cvt_i32_f32_e32 v1, v1
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v3, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v1, v1, v2, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = fptosi <4 x bfloat> %x to <4 x i16>
   ret <4 x i16> %op
 }
@@ -34518,15 +34562,25 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_select_fneg_lhs_bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_xor_b16 v1.l, 0x8000, v1.l
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.a = fneg bfloat %a
   %op = select i1 %cond, bfloat %neg.a, bfloat %b
   ret bfloat %op
@@ -34582,15 +34636,25 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_select_fneg_rhs_bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_xor_b16 v2.l, 0x8000, v2.l
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %neg.b = fneg bfloat %b
   %op = select i1 %cond, bfloat %a, bfloat %neg.b
   ret bfloat %op
@@ -34673,18 +34737,31 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
 ; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_select_v2bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v2, v1 :: v_dual_cndmask_b32 v1, v4, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_select_v2bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_dual_cndmask_b32 v0, v2, v1 :: v_dual_cndmask_b32 v1, v3, v4
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_select_v2bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v0, v2, v1 :: v_dual_cndmask_b32 v1, v4, v3
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
   ret <2 x bfloat> %op
 }
@@ -34768,20 +34845,35 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
 ; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v2bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_vselect_v2bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_vselect_v2bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, v5, v4, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
   ret <2 x bfloat> %op
 }
@@ -34936,20 +35028,36 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: s_select_v2bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
-; GFX11-NEXT:    s_lshr_b32 s3, s1, 16
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, s3, v1, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, s1, v2, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11TRUE16-LABEL: s_select_v2bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11TRUE16-NEXT:    s_lshr_b32 s3, s0, 16
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v1.l, s2
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v2.l, s3
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v3.l, s1
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v4.l, s0
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_dual_cndmask_b32 v0, v1, v2 :: v_dual_cndmask_b32 v1, v3, v4
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11FAKE16-LABEL: s_select_v2bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11FAKE16-NEXT:    v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
+; GFX11FAKE16-NEXT:    s_lshr_b32 s3, s1, 16
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, s3, v1, vcc_lo
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, s1, v2, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v0, v1, 0x5040100
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq i32 %c, 0
   %op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
   %cast = bitcast <2 x bfloat> %op to i32
@@ -35038,21 +35146,39 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
 ; GFX10-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: s_vselect_v2bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_lshr_b32 s2, s0, 16
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
-; GFX11-NEXT:    s_lshr_b32 s0, s1, 16
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, s0, v2, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, s1, v3, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11TRUE16-LABEL: s_vselect_v2bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX11TRUE16-NEXT:    s_lshr_b32 s3, s0, 16
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v2.l, s2
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v3.l, s3
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v4.l, s1
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v5.l, s0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v2, v3, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11FAKE16-LABEL: s_vselect_v2bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_lshr_b32 s2, s0, 16
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11FAKE16-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
+; GFX11FAKE16-NEXT:    s_lshr_b32 s0, s1, 16
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, s0, v2, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, s1, v3, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq <2 x i32> %c, zeroinitializer
   %op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
   %cast = bitcast <2 x bfloat> %op to i32
@@ -36740,33 +36866,64 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
 ; GFX10-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
-; GFX11-LABEL: s_vselect_v4bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_lshr_b32 s4, s1, 16
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX11-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
-; GFX11-NEXT:    s_lshr_b32 s4, s3, 16
-; GFX11-NEXT:    s_lshr_b32 s5, s0, 16
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, s4, v4, vcc_lo
-; GFX11-NEXT:    v_mov_b32_e32 v4, s5
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NEXT:    v_mov_b32_e32 v6, s0
-; GFX11-NEXT:    s_lshr_b32 s0, s2, 16
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instid1(SALU_CYCLE_1)
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, s0, v4, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, s2, v6, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, s3, v5, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX11-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX11-NEXT:    ; return to shader part epilog
+; GFX11TRUE16-LABEL: s_vselect_v4bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_lshr_b32 s4, s3, 16
+; GFX11TRUE16-NEXT:    s_lshr_b32 s5, s1, 16
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v4.l, s4
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v5.l, s5
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX11TRUE16-NEXT:    s_lshr_b32 s4, s0, 16
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v7.l, s2
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v8.l, s0
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v6.l, s4
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc_lo
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v4.l, s3
+; GFX11TRUE16-NEXT:    s_lshr_b32 s3, s2, 16
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v5.l, s3
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v9.l, s1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3)
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v7, v8, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v2, v4, v9, vcc_lo
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11TRUE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX11FAKE16-LABEL: s_vselect_v4bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_lshr_b32 s4, s1, 16
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX11FAKE16-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
+; GFX11FAKE16-NEXT:    s_lshr_b32 s4, s3, 16
+; GFX11FAKE16-NEXT:    s_lshr_b32 s5, s0, 16
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v3, s4, v4, vcc_lo
+; GFX11FAKE16-NEXT:    v_mov_b32_e32 v4, s5
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11FAKE16-NEXT:    v_mov_b32_e32 v6, s0
+; GFX11FAKE16-NEXT:    s_lshr_b32 s0, s2, 16
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instid1(SALU_CYCLE_1)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, s0, v4, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, s2, v6, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v2
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v2, s3, v5, vcc_lo
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX11FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX11FAKE16-NEXT:    ; return to shader part epilog
   %cond = icmp eq <4 x i32> %c, zeroinitializer
   %op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
   %cast = bitcast <4 x bfloat> %op to <2 x i32>
@@ -36910,29 +37067,57 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
 ; GFX10-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v4bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
-; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, v7, v5 :: v_dual_and_b32 v3, 1, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_dual_cndmask_b32 v0, v6, v4 :: v_dual_and_b32 v1, 1, v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc_lo
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_vselect_v4bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v8.l, v7.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v9.l, v5.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v2, v8, v9, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v4
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v6, v4, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v8, v9, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_vselect_v4bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v6
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v2, v7, v5 :: v_dual_and_b32 v3, 1, v3
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v0, v6, v4 :: v_dual_and_b32 v1, 1, v1
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, v9, v8, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v3, v7, v5, vcc_lo
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
   ret <4 x bfloat> %op
 }
@@ -37172,47 +37357,97 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
 ; GFX10-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v8bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 16, v10
-; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
-; GFX11-NEXT:    v_and_b32_e32 v6, 1, v6
-; GFX11-NEXT:    v_and_b32_e32 v5, 1, v5
-; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX11-NEXT:    v_dual_cndmask_b32 v6, v15, v11 :: v_dual_and_b32 v1, 1, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
-; GFX11-NEXT:    v_and_b32_e32 v4, 1, v4
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX11-NEXT:    v_dual_cndmask_b32 v4, v14, v10 :: v_dual_and_b32 v3, 1, v3
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
-; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v12
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v17, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11-NEXT:    v_dual_cndmask_b32 v2, v13, v9 :: v_dual_and_b32 v7, 1, v7
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v12, v8, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v14, v10, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v13, v9, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX11-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, v15, v11, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_vselect_v8bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v16.l, v15.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v17.l, v11.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v6, v16, v17, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v14
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v10
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v4, v14, v10, vcc_lo
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v10.l, v13.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v14.l, v9.l
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v5, v16, v17, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v2, v10, v14, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v7, 1, v7
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v12
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v8
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v12, v8, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v10, v14, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v3, v13, v9, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v7, v15, v11, vcc_lo
+; GFX11TRUE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_vselect_v8bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v10
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v14
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v6, v15, v11 :: v_dual_and_b32 v1, 1, v1
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v11, 16, v11
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v15, 16, v15
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v4, v14, v10 :: v_dual_and_b32 v3, 1, v3
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v14, 16, v12
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v5, v17, v16, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v2, v13, v9 :: v_dual_and_b32 v7, 1, v7
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v9, 16, v9
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v13, 16, v13
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v12, v8, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, v14, v10, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v3, v13, v9, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX11FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v7, v15, v11, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = select <8 x i1> %cond, <8 x bfloat> %a, <8 x bfloat> %b
   ret <8 x bfloat> %op
 }
@@ -37777,85 +38012,186 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
 ; GFX10-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v16bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    scratch_load_b32 v31, off, s32
-; GFX11-NEXT:    v_lshrrev_b32_e32 v39, 16, v19
-; GFX11-NEXT:    v_lshrrev_b32_e32 v48, 16, v27
-; GFX11-NEXT:    v_and_b32_e32 v12, 1, v12
-; GFX11-NEXT:    v_and_b32_e32 v13, 1, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v33, 16, v22
-; GFX11-NEXT:    v_lshrrev_b32_e32 v34, 16, v30
-; GFX11-NEXT:    v_lshrrev_b32_e32 v49, 16, v18
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v50, 16, v26
-; GFX11-NEXT:    v_and_b32_e32 v10, 1, v10
-; GFX11-NEXT:    v_dual_cndmask_b32 v12, v30, v22 :: v_dual_and_b32 v11, 1, v11
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    v_lshrrev_b32_e32 v35, 16, v21
-; GFX11-NEXT:    v_lshrrev_b32_e32 v36, 16, v29
-; GFX11-NEXT:    v_lshrrev_b32_e32 v53, 16, v16
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, v34, v33, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX11-NEXT:    v_and_b32_e32 v3, 1, v3
-; GFX11-NEXT:    v_lshrrev_b32_e32 v54, 16, v24
-; GFX11-NEXT:    v_and_b32_e32 v8, 1, v8
-; GFX11-NEXT:    v_and_b32_e32 v9, 1, v9
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, v29, v21, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
-; GFX11-NEXT:    v_lshrrev_b32_e32 v37, 16, v20
-; GFX11-NEXT:    v_lshrrev_b32_e32 v38, 16, v28
-; GFX11-NEXT:    v_lshrrev_b32_e32 v51, 16, v17
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v36, v35, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX11-NEXT:    v_and_b32_e32 v5, 1, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v52, 16, v25
-; GFX11-NEXT:    v_and_b32_e32 v6, 1, v6
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, v28, v20, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX11-NEXT:    v_and_b32_e32 v4, 1, v4
-; GFX11-NEXT:    v_and_b32_e32 v15, 1, v15
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, v38, v37, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, v27, v19, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX11-NEXT:    v_dual_cndmask_b32 v4, v26, v18 :: v_dual_and_b32 v7, 1, v7
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v25, v17, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v52, v51, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v24, v16, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v54, v53, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v50, v49, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX11-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, v48, v39, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v31
-; GFX11-NEXT:    v_and_b32_e32 v14, 1, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX11-NEXT:    v_cndmask_b32_e32 v14, v31, v23, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, v3, v32, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX11-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_vselect_v16bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v49.l, v26.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v50.l, v18.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v18, 16, v18
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v26, 16, v26
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v12, 1, v12
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v33.l, v30.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v34.l, v22.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v53.l, v24.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v54.l, v16.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v16
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v24, 16, v24
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v10, 1, v10
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v35.l, v29.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v36.l, v21.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v51.l, v25.l
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v12, v33, v34, vcc_lo
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v52.l, v17.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v17, 16, v17
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v25, 16, v25
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v8, 1, v8
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v37.l, v28.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v38.l, v20.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v20, 16, v20
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v10, v35, v36, vcc_lo
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v28, 16, v28
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v39.l, v27.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v48.l, v19.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v19, 16, v19
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v8, v37, v38, vcc_lo
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v27, 16, v27
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v7, 1, v7
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v21, 16, v21
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v29, 16, v29
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v6, v39, v48, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v9, 1, v9
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v22, 16, v22
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v30, 16, v30
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v4, v49, v50, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v11, 1, v11
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v32.l, v23.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v23, 16, v23
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v2, v51, v52, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v13, 1, v13
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v53, v54, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v13, v30, v22, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v11, v29, v21, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v9, v28, v20, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v7, v27, v19, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v24, v16, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v3, v25, v17, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX11TRUE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v5, v26, v18, vcc_lo
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v16, 16, v31
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v17.l, v31.l
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v3.l, v16.l
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v14, 1, v14
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11TRUE16-NEXT:    v_dual_cndmask_b32 v14, v17, v32 :: v_dual_and_b32 v15, 1, v15
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v15, v3, v23, vcc_lo
+; GFX11TRUE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11TRUE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_vselect_v16bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    scratch_load_b32 v31, off, s32
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v39, 16, v19
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v48, 16, v27
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v12, 1, v12
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v13, 1, v13
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v22
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v30
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v49, 16, v18
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v50, 16, v26
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v10, 1, v10
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v12, v30, v22 :: v_dual_and_b32 v11, 1, v11
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v21
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v36, 16, v29
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v53, 16, v16
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v13, v34, v33, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v54, 16, v24
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v8, 1, v8
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v9, 1, v9
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v10, v29, v21, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v37, 16, v20
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v38, 16, v28
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v51, 16, v17
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v11, v36, v35, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v52, 16, v25
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v23
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v8, v28, v20, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v15, 1, v15
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v9, v38, v37, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v6, v27, v19, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v4, v26, v18 :: v_dual_and_b32 v7, 1, v7
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v2, v25, v17, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v3, v52, v51, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v24, v16, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, v54, v53, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v5, v50, v49, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX11FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v7, v48, v39, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v31
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v14, 1, v14
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v14, v31, v23, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v15, v3, v32, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX11FAKE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = select <16 x i1> %cond, <16 x bfloat> %a, <16 x bfloat> %b
   ret <16 x bfloat> %op
 }
@@ -39279,205 +39615,455 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: v_vselect_v32bf16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_clause 0x1f
-; GFX11-NEXT:    scratch_load_u16 v31, off, s32
-; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:128
-; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:64
-; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:124
-; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:60
-; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:120
-; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:56
-; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:116
-; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:52
-; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:112
-; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:48
-; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:108
-; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:44
-; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:104
-; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:40
-; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:100
-; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:36
-; GFX11-NEXT:    scratch_load_b32 v64, off, s32 offset:96
-; GFX11-NEXT:    scratch_load_b32 v65, off, s32 offset:32
-; GFX11-NEXT:    scratch_load_b32 v66, off, s32 offset:92
-; GFX11-NEXT:    scratch_load_b32 v67, off, s32 offset:28
-; GFX11-NEXT:    scratch_load_b32 v68, off, s32 offset:88
-; GFX11-NEXT:    scratch_load_b32 v69, off, s32 offset:24
-; GFX11-NEXT:    scratch_load_b32 v70, off, s32 offset:84
-; GFX11-NEXT:    scratch_load_b32 v71, off, s32 offset:20
-; GFX11-NEXT:    scratch_load_b32 v80, off, s32 offset:80
-; GFX11-NEXT:    scratch_load_b32 v81, off, s32 offset:16
-; GFX11-NEXT:    scratch_load_b32 v82, off, s32 offset:76
-; GFX11-NEXT:    scratch_load_b32 v83, off, s32 offset:12
-; GFX11-NEXT:    scratch_load_b32 v84, off, s32 offset:72
-; GFX11-NEXT:    scratch_load_b32 v85, off, s32 offset:8
-; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:68
-; GFX11-NEXT:    scratch_load_b32 v87, off, s32 offset:4
-; GFX11-NEXT:    v_and_b32_e32 v30, 1, v30
-; GFX11-NEXT:    v_and_b32_e32 v28, 1, v28
-; GFX11-NEXT:    v_and_b32_e32 v26, 1, v26
-; GFX11-NEXT:    v_and_b32_e32 v24, 1, v24
-; GFX11-NEXT:    v_and_b32_e32 v22, 1, v22
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v30
-; GFX11-NEXT:    v_and_b32_e32 v3, 1, v3
-; GFX11-NEXT:    v_and_b32_e32 v20, 1, v20
-; GFX11-NEXT:    v_and_b32_e32 v18, 1, v18
-; GFX11-NEXT:    v_and_b32_e32 v16, 1, v16
-; GFX11-NEXT:    s_waitcnt vmcnt(30)
-; GFX11-NEXT:    v_cndmask_b32_e32 v30, v32, v33, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v28
-; GFX11-NEXT:    v_and_b32_e32 v1, 1, v1
-; GFX11-NEXT:    v_lshrrev_b32_e32 v33, 16, v33
-; GFX11-NEXT:    v_lshrrev_b32_e32 v32, 16, v32
-; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
-; GFX11-NEXT:    s_waitcnt vmcnt(28)
-; GFX11-NEXT:    v_cndmask_b32_e32 v28, v34, v35, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v26
-; GFX11-NEXT:    v_and_b32_e32 v7, 1, v7
-; GFX11-NEXT:    v_lshrrev_b32_e32 v35, 16, v35
-; GFX11-NEXT:    v_lshrrev_b32_e32 v34, 16, v34
-; GFX11-NEXT:    v_and_b32_e32 v2, 1, v2
-; GFX11-NEXT:    s_waitcnt vmcnt(26)
-; GFX11-NEXT:    v_cndmask_b32_e32 v26, v36, v37, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v24
-; GFX11-NEXT:    v_and_b32_e32 v5, 1, v5
-; GFX11-NEXT:    v_lshrrev_b32_e32 v37, 16, v37
-; GFX11-NEXT:    v_lshrrev_b32_e32 v36, 16, v36
-; GFX11-NEXT:    v_and_b32_e32 v4, 1, v4
-; GFX11-NEXT:    s_waitcnt vmcnt(24)
-; GFX11-NEXT:    v_cndmask_b32_e32 v24, v38, v39, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v22
-; GFX11-NEXT:    v_and_b32_e32 v11, 1, v11
-; GFX11-NEXT:    v_lshrrev_b32_e32 v39, 16, v39
-; GFX11-NEXT:    v_lshrrev_b32_e32 v38, 16, v38
-; GFX11-NEXT:    v_and_b32_e32 v6, 1, v6
-; GFX11-NEXT:    s_waitcnt vmcnt(22)
-; GFX11-NEXT:    v_cndmask_b32_e32 v22, v48, v49, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v20
-; GFX11-NEXT:    v_and_b32_e32 v9, 1, v9
-; GFX11-NEXT:    v_lshrrev_b32_e32 v49, 16, v49
-; GFX11-NEXT:    v_lshrrev_b32_e32 v48, 16, v48
-; GFX11-NEXT:    v_and_b32_e32 v8, 1, v8
-; GFX11-NEXT:    s_waitcnt vmcnt(20)
-; GFX11-NEXT:    v_cndmask_b32_e32 v20, v50, v51, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v18
-; GFX11-NEXT:    v_and_b32_e32 v15, 1, v15
-; GFX11-NEXT:    v_lshrrev_b32_e32 v51, 16, v51
-; GFX11-NEXT:    v_lshrrev_b32_e32 v50, 16, v50
-; GFX11-NEXT:    v_and_b32_e32 v10, 1, v10
-; GFX11-NEXT:    s_waitcnt vmcnt(18)
-; GFX11-NEXT:    v_cndmask_b32_e32 v18, v52, v53, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
-; GFX11-NEXT:    v_and_b32_e32 v13, 1, v13
-; GFX11-NEXT:    v_lshrrev_b32_e32 v53, 16, v53
-; GFX11-NEXT:    v_lshrrev_b32_e32 v52, 16, v52
-; GFX11-NEXT:    v_and_b32_e32 v12, 1, v12
-; GFX11-NEXT:    s_waitcnt vmcnt(16)
-; GFX11-NEXT:    v_cndmask_b32_e32 v16, v54, v55, vcc_lo
-; GFX11-NEXT:    v_lshrrev_b32_e32 v55, 16, v55
-; GFX11-NEXT:    v_lshrrev_b32_e32 v54, 16, v54
-; GFX11-NEXT:    v_and_b32_e32 v14, 1, v14
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX11-NEXT:    s_waitcnt vmcnt(14)
-; GFX11-NEXT:    v_dual_cndmask_b32 v14, v64, v65 :: v_dual_and_b32 v19, 1, v19
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX11-NEXT:    v_and_b32_e32 v17, 1, v17
-; GFX11-NEXT:    v_lshrrev_b32_e32 v65, 16, v65
-; GFX11-NEXT:    v_lshrrev_b32_e32 v64, 16, v64
-; GFX11-NEXT:    s_waitcnt vmcnt(12)
-; GFX11-NEXT:    v_cndmask_b32_e32 v12, v66, v67, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX11-NEXT:    v_and_b32_e32 v23, 1, v23
-; GFX11-NEXT:    v_lshrrev_b32_e32 v67, 16, v67
-; GFX11-NEXT:    v_lshrrev_b32_e32 v66, 16, v66
-; GFX11-NEXT:    s_waitcnt vmcnt(10)
-; GFX11-NEXT:    v_cndmask_b32_e32 v10, v68, v69, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX11-NEXT:    v_and_b32_e32 v21, 1, v21
-; GFX11-NEXT:    v_lshrrev_b32_e32 v69, 16, v69
-; GFX11-NEXT:    v_lshrrev_b32_e32 v68, 16, v68
-; GFX11-NEXT:    s_waitcnt vmcnt(8)
-; GFX11-NEXT:    v_cndmask_b32_e32 v8, v70, v71, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX11-NEXT:    v_and_b32_e32 v27, 1, v27
-; GFX11-NEXT:    v_lshrrev_b32_e32 v71, 16, v71
-; GFX11-NEXT:    v_lshrrev_b32_e32 v70, 16, v70
-; GFX11-NEXT:    s_waitcnt vmcnt(6)
-; GFX11-NEXT:    v_cndmask_b32_e32 v6, v80, v81, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX11-NEXT:    v_and_b32_e32 v25, 1, v25
-; GFX11-NEXT:    v_lshrrev_b32_e32 v81, 16, v81
-; GFX11-NEXT:    v_lshrrev_b32_e32 v80, 16, v80
-; GFX11-NEXT:    s_waitcnt vmcnt(4)
-; GFX11-NEXT:    v_cndmask_b32_e32 v4, v82, v83, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX11-NEXT:    v_and_b32_e32 v31, 1, v31
-; GFX11-NEXT:    v_lshrrev_b32_e32 v83, 16, v83
-; GFX11-NEXT:    v_lshrrev_b32_e32 v82, 16, v82
-; GFX11-NEXT:    s_waitcnt vmcnt(2)
-; GFX11-NEXT:    v_cndmask_b32_e32 v2, v84, v85, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX11-NEXT:    v_and_b32_e32 v29, 1, v29
-; GFX11-NEXT:    v_lshrrev_b32_e32 v85, 16, v85
-; GFX11-NEXT:    v_lshrrev_b32_e32 v84, 16, v84
-; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_cndmask_b32_e32 v0, v86, v87, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v31
-; GFX11-NEXT:    v_lshrrev_b32_e32 v87, 16, v87
-; GFX11-NEXT:    v_lshrrev_b32_e32 v86, 16, v86
-; GFX11-NEXT:    v_cndmask_b32_e32 v31, v32, v33, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v29
-; GFX11-NEXT:    v_cndmask_b32_e32 v29, v34, v35, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v27
-; GFX11-NEXT:    v_cndmask_b32_e32 v27, v36, v37, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v25
-; GFX11-NEXT:    v_cndmask_b32_e32 v25, v38, v39, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v23
-; GFX11-NEXT:    v_cndmask_b32_e32 v23, v48, v49, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v21
-; GFX11-NEXT:    v_cndmask_b32_e32 v21, v50, v51, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v19
-; GFX11-NEXT:    v_cndmask_b32_e32 v19, v52, v53, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v17
-; GFX11-NEXT:    v_cndmask_b32_e32 v17, v54, v55, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX11-NEXT:    v_cndmask_b32_e32 v15, v64, v65, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX11-NEXT:    v_cndmask_b32_e32 v13, v66, v67, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX11-NEXT:    v_cndmask_b32_e32 v11, v68, v69, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX11-NEXT:    v_cndmask_b32_e32 v7, v80, v81, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX11-NEXT:    v_cndmask_b32_e32 v3, v84, v85, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX11-NEXT:    v_cndmask_b32_e32 v1, v86, v87, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v5, v82, v83, vcc_lo
-; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX11-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
-; GFX11-NEXT:    v_cndmask_b32_e32 v9, v70, v71, vcc_lo
-; GFX11-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v10, v21, v20, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v11, v23, v22, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v8, v17, v16, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v9, v19, v18, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v12, v25, v24, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v13, v27, v26, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v14, v29, v28, 0x5040100
-; GFX11-NEXT:    v_perm_b32 v15, v31, v30, 0x5040100
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11TRUE16-LABEL: v_vselect_v32bf16:
+; GFX11TRUE16:       ; %bb.0:
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11TRUE16-NEXT:    s_clause 0x1f
+; GFX11TRUE16-NEXT:    scratch_load_u16 v31, off, s32
+; GFX11TRUE16-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; GFX11TRUE16-NEXT:    scratch_load_b32 v33, off, s32 offset:64
+; GFX11TRUE16-NEXT:    scratch_load_b32 v34, off, s32 offset:124
+; GFX11TRUE16-NEXT:    scratch_load_b32 v35, off, s32 offset:60
+; GFX11TRUE16-NEXT:    scratch_load_b32 v36, off, s32 offset:120
+; GFX11TRUE16-NEXT:    scratch_load_b32 v37, off, s32 offset:56
+; GFX11TRUE16-NEXT:    scratch_load_b32 v38, off, s32 offset:116
+; GFX11TRUE16-NEXT:    scratch_load_b32 v39, off, s32 offset:52
+; GFX11TRUE16-NEXT:    scratch_load_b32 v48, off, s32 offset:112
+; GFX11TRUE16-NEXT:    scratch_load_b32 v49, off, s32 offset:48
+; GFX11TRUE16-NEXT:    scratch_load_b32 v50, off, s32 offset:108
+; GFX11TRUE16-NEXT:    scratch_load_b32 v51, off, s32 offset:44
+; GFX11TRUE16-NEXT:    scratch_load_b32 v52, off, s32 offset:104
+; GFX11TRUE16-NEXT:    scratch_load_b32 v53, off, s32 offset:40
+; GFX11TRUE16-NEXT:    scratch_load_b32 v54, off, s32 offset:100
+; GFX11TRUE16-NEXT:    scratch_load_b32 v55, off, s32 offset:36
+; GFX11TRUE16-NEXT:    scratch_load_b32 v64, off, s32 offset:96
+; GFX11TRUE16-NEXT:    scratch_load_b32 v65, off, s32 offset:32
+; GFX11TRUE16-NEXT:    scratch_load_b32 v66, off, s32 offset:92
+; GFX11TRUE16-NEXT:    scratch_load_b32 v67, off, s32 offset:28
+; GFX11TRUE16-NEXT:    scratch_load_b32 v68, off, s32 offset:88
+; GFX11TRUE16-NEXT:    scratch_load_b32 v69, off, s32 offset:24
+; GFX11TRUE16-NEXT:    scratch_load_b32 v70, off, s32 offset:84
+; GFX11TRUE16-NEXT:    scratch_load_b32 v71, off, s32 offset:20
+; GFX11TRUE16-NEXT:    scratch_load_b32 v80, off, s32 offset:80
+; GFX11TRUE16-NEXT:    scratch_load_b32 v81, off, s32 offset:16
+; GFX11TRUE16-NEXT:    scratch_load_b32 v82, off, s32 offset:76
+; GFX11TRUE16-NEXT:    scratch_load_b32 v83, off, s32 offset:12
+; GFX11TRUE16-NEXT:    scratch_load_b32 v84, off, s32 offset:72
+; GFX11TRUE16-NEXT:    scratch_load_b32 v85, off, s32 offset:8
+; GFX11TRUE16-NEXT:    scratch_load_b32 v86, off, s32 offset:68
+; GFX11TRUE16-NEXT:    scratch_load_b32 v87, off, s32 offset:4
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(31)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v96.l, v32.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(30)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v97.l, v33.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(29)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v98.l, v34.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(28)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v99.l, v35.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(27)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v100.l, v36.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(26)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v101.l, v37.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(25)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v102.l, v38.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(24)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v103.l, v39.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(23)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v112.l, v48.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(22)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v113.l, v49.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(21)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v114.l, v50.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(20)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v115.l, v51.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(19)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v116.l, v52.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(18)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v117.l, v53.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(17)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v118.l, v54.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(16)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v119.l, v55.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(15)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v128.l, v64.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(14)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v129.l, v65.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(13)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v130.l, v66.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(12)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v131.l, v67.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(11)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v132.l, v68.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(10)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v133.l, v69.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(9)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v134.l, v70.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(8)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v135.l, v71.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v71, 16, v71
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v70, 16, v70
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v30, 1, v30
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(5)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v146.l, v82.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(4)
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v147.l, v83.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v83, 16, v83
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v82, 16, v82
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v28, 1, v28
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v30
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(3)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v30.l, v84.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v84, 16, v84
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v26, 1, v26
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v96, v96, v97, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v28
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v97.l, v85.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v85, 16, v85
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v24, 1, v24
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v98, v98, v99, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v26
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v7, 1, v7
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v144.l, v80.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e64 v145.l, v81.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v81, 16, v81
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v26, v100, v101, vcc_lo
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v80, 16, v80
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v22, 1, v22
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v24
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v69, 16, v69
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v68, 16, v68
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v20, 1, v20
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v24, v102, v103, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v22
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v11, 1, v11
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v67, 16, v67
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v66, 16, v66
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v18, 1, v18
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v22, v112, v113, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v20
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v9, 1, v9
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v65, 16, v65
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v64, 16, v64
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v16, 1, v16
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v20, v114, v115, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v18
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v15, 1, v15
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v55, 16, v55
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v54, 16, v54
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v14, 1, v14
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v18, v116, v117, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v13, 1, v13
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v53, 16, v53
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v52, 16, v52
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v12, 1, v12
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v16, v118, v119, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v19, 1, v19
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v51, 16, v51
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v50, 16, v50
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v10, 1, v10
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v14, v128, v129, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v17, 1, v17
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v49, 16, v49
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v48, 16, v48
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v8, 1, v8
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v12, v130, v131, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v23, 1, v23
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v39, 16, v39
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v38, 16, v38
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v10, v132, v133, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v21, 1, v21
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v37, 16, v37
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v36, 16, v36
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v8, v134, v135, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v27, 1, v27
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v35
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v34
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v6, v144, v145, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v25, 1, v25
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v33
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v32
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v4, v146, v147, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v31, 1, v31
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(1)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v28.l, v86.l
+; GFX11TRUE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v99.l, v87.l
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v87, 16, v87
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v2, v30, v97, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11TRUE16-NEXT:    v_and_b32_e32 v29, 1, v29
+; GFX11TRUE16-NEXT:    v_lshrrev_b32_e32 v86, 16, v86
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v30.l, v84.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v84.l, v85.l
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v0, v28, v99, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v31
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v28.l, v86.l
+; GFX11TRUE16-NEXT:    v_mov_b16_e32 v85.l, v87.l
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v31, v32, v33, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v29
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v29, v34, v35, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v27
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v27, v36, v37, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v25
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v25, v38, v39, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v23
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v23, v48, v49, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v21
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v21, v50, v51, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v19
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v19, v52, v53, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v17
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v17, v54, v55, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v15, v64, v65, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v13, v66, v67, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v11, v68, v69, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v7, v80, v81, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v3, v30, v84, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v1, v28, v85, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX11TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11TRUE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v5, v82, v83, vcc_lo
+; GFX11TRUE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX11TRUE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX11TRUE16-NEXT:    v_cndmask_b32_e32 v9, v70, v71, vcc_lo
+; GFX11TRUE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v10, v21, v20, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v11, v23, v22, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v8, v17, v16, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v9, v19, v18, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v12, v25, v24, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v13, v27, v26, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v14, v29, v98, 0x5040100
+; GFX11TRUE16-NEXT:    v_perm_b32 v15, v31, v96, 0x5040100
+; GFX11TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11FAKE16-LABEL: v_vselect_v32bf16:
+; GFX11FAKE16:       ; %bb.0:
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11FAKE16-NEXT:    s_clause 0x1f
+; GFX11FAKE16-NEXT:    scratch_load_u16 v31, off, s32
+; GFX11FAKE16-NEXT:    scratch_load_b32 v32, off, s32 offset:128
+; GFX11FAKE16-NEXT:    scratch_load_b32 v33, off, s32 offset:64
+; GFX11FAKE16-NEXT:    scratch_load_b32 v34, off, s32 offset:124
+; GFX11FAKE16-NEXT:    scratch_load_b32 v35, off, s32 offset:60
+; GFX11FAKE16-NEXT:    scratch_load_b32 v36, off, s32 offset:120
+; GFX11FAKE16-NEXT:    scratch_load_b32 v37, off, s32 offset:56
+; GFX11FAKE16-NEXT:    scratch_load_b32 v38, off, s32 offset:116
+; GFX11FAKE16-NEXT:    scratch_load_b32 v39, off, s32 offset:52
+; GFX11FAKE16-NEXT:    scratch_load_b32 v48, off, s32 offset:112
+; GFX11FAKE16-NEXT:    scratch_load_b32 v49, off, s32 offset:48
+; GFX11FAKE16-NEXT:    scratch_load_b32 v50, off, s32 offset:108
+; GFX11FAKE16-NEXT:    scratch_load_b32 v51, off, s32 offset:44
+; GFX11FAKE16-NEXT:    scratch_load_b32 v52, off, s32 offset:104
+; GFX11FAKE16-NEXT:    scratch_load_b32 v53, off, s32 offset:40
+; GFX11FAKE16-NEXT:    scratch_load_b32 v54, off, s32 offset:100
+; GFX11FAKE16-NEXT:    scratch_load_b32 v55, off, s32 offset:36
+; GFX11FAKE16-NEXT:    scratch_load_b32 v64, off, s32 offset:96
+; GFX11FAKE16-NEXT:    scratch_load_b32 v65, off, s32 offset:32
+; GFX11FAKE16-NEXT:    scratch_load_b32 v66, off, s32 offset:92
+; GFX11FAKE16-NEXT:    scratch_load_b32 v67, off, s32 offset:28
+; GFX11FAKE16-NEXT:    scratch_load_b32 v68, off, s32 offset:88
+; GFX11FAKE16-NEXT:    scratch_load_b32 v69, off, s32 offset:24
+; GFX11FAKE16-NEXT:    scratch_load_b32 v70, off, s32 offset:84
+; GFX11FAKE16-NEXT:    scratch_load_b32 v71, off, s32 offset:20
+; GFX11FAKE16-NEXT:    scratch_load_b32 v80, off, s32 offset:80
+; GFX11FAKE16-NEXT:    scratch_load_b32 v81, off, s32 offset:16
+; GFX11FAKE16-NEXT:    scratch_load_b32 v82, off, s32 offset:76
+; GFX11FAKE16-NEXT:    scratch_load_b32 v83, off, s32 offset:12
+; GFX11FAKE16-NEXT:    scratch_load_b32 v84, off, s32 offset:72
+; GFX11FAKE16-NEXT:    scratch_load_b32 v85, off, s32 offset:8
+; GFX11FAKE16-NEXT:    scratch_load_b32 v86, off, s32 offset:68
+; GFX11FAKE16-NEXT:    scratch_load_b32 v87, off, s32 offset:4
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v30, 1, v30
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v28, 1, v28
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v26, 1, v26
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v24, 1, v24
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v22, 1, v22
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v30
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v3, 1, v3
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v20, 1, v20
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v18, 1, v18
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v16, 1, v16
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(30)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v30, v32, v33, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v28
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v1, 1, v1
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v33, 16, v33
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v32, 16, v32
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v0, 1, v0
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(28)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v28, v34, v35, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v26
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v7, 1, v7
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v35, 16, v35
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v34, 16, v34
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v2, 1, v2
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(26)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v26, v36, v37, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v24
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v5, 1, v5
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v37, 16, v37
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v36, 16, v36
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v4, 1, v4
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(24)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v24, v38, v39, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v22
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v11, 1, v11
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v39, 16, v39
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v38, 16, v38
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v6, 1, v6
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(22)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v22, v48, v49, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v20
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v9, 1, v9
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v49, 16, v49
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v48, 16, v48
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v8, 1, v8
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(20)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v20, v50, v51, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v18
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v15, 1, v15
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v51, 16, v51
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v50, 16, v50
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v10, 1, v10
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(18)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v18, v52, v53, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v16
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v13, 1, v13
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v53, 16, v53
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v52, 16, v52
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v12, 1, v12
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(16)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v16, v54, v55, vcc_lo
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v55, 16, v55
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v54, 16, v54
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v14, 1, v14
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(14)
+; GFX11FAKE16-NEXT:    v_dual_cndmask_b32 v14, v64, v65 :: v_dual_and_b32 v19, 1, v19
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v17, 1, v17
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v65, 16, v65
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v64, 16, v64
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(12)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v12, v66, v67, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v23, 1, v23
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v67, 16, v67
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v66, 16, v66
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(10)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v10, v68, v69, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v21, 1, v21
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v69, 16, v69
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v68, 16, v68
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(8)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v8, v70, v71, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v27, 1, v27
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v71, 16, v71
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v70, 16, v70
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(6)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v6, v80, v81, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v25, 1, v25
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v81, 16, v81
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v80, 16, v80
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(4)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v4, v82, v83, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v31, 1, v31
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v83, 16, v83
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v82, 16, v82
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(2)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v2, v84, v85, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX11FAKE16-NEXT:    v_and_b32_e32 v29, 1, v29
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v85, 16, v85
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v84, 16, v84
+; GFX11FAKE16-NEXT:    s_waitcnt vmcnt(0)
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v0, v86, v87, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v31
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v87, 16, v87
+; GFX11FAKE16-NEXT:    v_lshrrev_b32_e32 v86, 16, v86
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v31, v32, v33, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v29
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v29, v34, v35, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v27
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v27, v36, v37, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v25
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v25, v38, v39, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v23
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v23, v48, v49, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v21
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v21, v50, v51, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v19
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v19, v52, v53, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v17
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v17, v54, v55, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v15, v64, v65, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v13, v66, v67, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v11, v68, v69, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v7, v80, v81, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v3, v84, v85, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v1, v86, v87, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX11FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11FAKE16-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v5, v82, v83, vcc_lo
+; GFX11FAKE16-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX11FAKE16-NEXT:    v_perm_b32 v1, v3, v2, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v3, v7, v6, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v6, v13, v12, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v2, v5, v4, 0x5040100
+; GFX11FAKE16-NEXT:    v_cndmask_b32_e32 v9, v70, v71, vcc_lo
+; GFX11FAKE16-NEXT:    v_perm_b32 v5, v11, v10, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v7, v15, v14, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v10, v21, v20, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v11, v23, v22, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v4, v9, v8, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v8, v17, v16, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v9, v19, v18, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v12, v25, v24, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v13, v27, v26, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v14, v29, v28, 0x5040100
+; GFX11FAKE16-NEXT:    v_perm_b32 v15, v31, v30, 0x5040100
+; GFX11FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %op = select <32 x i1> %cond, <32 x bfloat> %a, <32 x bfloat> %b
   ret <32 x bfloat> %op
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
index 9fe7544003568c..1094b768f1bd16 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -100,9 +100,7 @@ define amdgpu_kernel void @fadd_f16(
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-GISEL-NEXT:    buffer_load_u16 v1, off, s[0:3], 0 glc dlc
 ; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b16_e32 v0.h, v1.l
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_add_f16_e32 v0.l, v0.l, v0.h
+; GFX11-GISEL-NEXT:    v_add_f16_e32 v0.l, v0.l, v1.l
 ; GFX11-GISEL-NEXT:    buffer_store_b16 v0, off, s[4:7], 0
 ; GFX11-GISEL-NEXT:    s_nop 0
 ; GFX11-GISEL-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
index 5cf457d1753b30..4faa482ede59f7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
@@ -165,12 +165,10 @@ define amdgpu_kernel void @ceil_v2f16(
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX11-NEXT:    v_ceil_f16_e32 v0.l, v0.l
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-NEXT:    v_ceil_f16_e32 v0.h, v1.l
 ; GFX11-NEXT:    v_mov_b16_e32 v1.l, v0.l
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_ceil_f16_e32 v0.h, v0.h
 ; GFX11-NEXT:    v_mov_b16_e32 v0.l, v0.h
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
 ; GFX11-NEXT:    s_nop 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
index ece55c7f7dceaa..61f6c9f7f0e6ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -166,12 +166,10 @@ define amdgpu_kernel void @floor_v2f16(
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX11-NEXT:    v_floor_f16_e32 v0.l, v0.l
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-NEXT:    v_floor_f16_e32 v0.h, v1.l
 ; GFX11-NEXT:    v_mov_b16_e32 v1.l, v0.l
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_floor_f16_e32 v0.h, v0.h
 ; GFX11-NEXT:    v_mov_b16_e32 v0.l, v0.h
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX11-NEXT:    buffer_store_b32 v0, off, s[4:7], 0
 ; GFX11-NEXT:    s_nop 0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index b2b5153bb6c2a2..72e86f1f6f9992 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -2,12 +2,14 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG-TRUE16 %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG-FAKE16 %s
 
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL-TRUE16 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL-FAKE16 %s
 
 define float @test_ldexp_f32_i32(ptr addrspace(1) %out, float %a, i32 %b) {
 ; GFX6-LABEL: test_ldexp_f32_i32:
@@ -211,13 +213,22 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
 ; GFX9-SDAG-NEXT:    v_ldexp_f16_sdwa v0, v0, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_f16_i8:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i8:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i8:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_f16_i8:
 ; GFX6-GISEL:       ; %bb.0:
@@ -248,15 +259,25 @@ define half @test_ldexp_f16_i8(half %a, i8 %b) {
 ; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v1
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_f16_i8:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 8
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fff
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v2
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i8:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i8:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call half @llvm.ldexp.f16.i8(half %a, i8 %b)
   ret half %result
 }
@@ -283,11 +304,19 @@ define half @test_ldexp_f16_i16(half %a, i16 %b) {
 ; GFX9-NEXT:    v_ldexp_f16_e32 v0, v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-LABEL: test_ldexp_f16_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i16:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i16:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_f16_i16:
 ; GFX6-GISEL:       ; %bb.0:
@@ -297,6 +326,18 @@ define half @test_ldexp_f16_i16(half %a, i16 %b) {
 ; GFX6-GISEL-NEXT:    v_ldexp_f32_e32 v0, v0, v1
 ; GFX6-GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i16:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i16:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call half @llvm.ldexp.f16.i16(half %a, i16 %b)
   ret half %result
 }
@@ -328,14 +369,23 @@ define half @test_ldexp_f16_i32(half %a, i32 %b) {
 ; GFX9-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v1
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_f16_i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_med3_i32 v1, v1, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_f16_i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_f16_i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_f16_i32:
 ; GFX6-GISEL:       ; %bb.0:
@@ -363,14 +413,23 @@ define half @test_ldexp_f16_i32(half %a, i32 %b) {
 ; GFX9-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v1
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_f16_i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, 0x7fff
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v2
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_f16_i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_f16_i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v2, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call half @llvm.ldexp.f16.i32(half %a, i32 %b)
   ret half %result
 }
@@ -411,19 +470,36 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
 ; GFX9-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-SDAG-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v1, v1, s0, 0x7fff
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v2, v3, v2
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v1, v1, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i32:
 ; GFX6-GISEL:       ; %bb.0:
@@ -460,21 +536,40 @@ define <2 x half> @test_ldexp_v2f16_v2i32(<2 x half> %a, <2 x i32> %b) {
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v3, 0x7fff
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v3
-; GFX11-GISEL-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v3
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v4, v2
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v4.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v3, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v1, 0xffff8000, v1, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v4, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b)
   ret <2 x half> %result
 }
@@ -509,16 +604,30 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
 ; GFX9-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i16:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v2, v3, v2
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v3.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v0, v1, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v3, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_v2f16_v2i16:
 ; GFX6-GISEL:       ; %bb.0:
@@ -549,18 +658,34 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i16:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v2, v3
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v2.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v2f16_v2i16:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v2, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b)
   ret <2 x half> %result
 }
@@ -608,21 +733,40 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
 ; GFX9-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v3
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-SDAG-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v3, v5, v3
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT:    v_med3_i32 v2, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v3
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v1, v2
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v5.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v2, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v0, v3, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v5, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v2, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i32:
 ; GFX6-GISEL:       ; %bb.0:
@@ -666,23 +810,44 @@ define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v5, 0x7fff
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v5
-; GFX11-GISEL-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v5
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v6, v3
-; GFX11-GISEL-NEXT:    v_med3_i32 v3, 0xffff8000, v4, v5
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v1, v3
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v5, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v5
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v5
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v6.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v3, 0xffff8000, v4, v5
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v5, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v6, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v3, 0xffff8000, v4, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b)
   ret <3 x half> %result
 }
@@ -723,17 +888,32 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
 ; GFX9-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v4
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i16:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v1, v3
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v2, v5, v4
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v5.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v5, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i16:
 ; GFX6-GISEL:       ; %bb.0:
@@ -770,19 +950,36 @@ define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v4
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i16:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v1, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v4, v5
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v4.l, v5.l
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v4, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> %a, <3 x i16> %b)
   ret <3 x half> %result
 }
@@ -839,26 +1036,53 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
 ; GFX9-SDAG-NEXT:    v_pack_b32_f16 v1, v1, v5
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    s_movk_i32 s0, 0x8000
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
-; GFX11-SDAG-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX11-SDAG-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v5, v6, v5
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v3, v7, v3
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v1, v4
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v3
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v1, v1, v5
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v6.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v1.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.h, v7.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v1, v3, v1
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    s_movk_i32 s0, 0x8000
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v5, v6, v5
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v7, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v4
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, v5
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i32:
 ; GFX6-GISEL:       ; %bb.0:
@@ -911,30 +1135,61 @@ define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v3
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mov_b32_e32 v6, 0x7fff
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v6
-; GFX11-GISEL-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v6
-; GFX11-GISEL-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v6
-; GFX11-GISEL-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v6
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v1, v4
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v7, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v3, v8, v5
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b32_e32 v6, 0x7fff
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v6
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v6
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v6
+; GFX11-GISEL-TRUE16-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v6
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v1.l, v4.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v7.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.h, v8.l, v5.l
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v1.h
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v4, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v1, v1, 16, v4
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_mov_b32_e32 v6, 0x7fff
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v2, 0xffff8000, v2, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v4, 0xffff8000, v4, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v3, 0xffff8000, v3, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_med3_i32 v5, 0xffff8000, v5, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v4
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v7, v3
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v8, v5
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b)
   ret <4 x half> %result
 }
@@ -983,22 +1238,45 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
 ; GFX9-SDAG-NEXT:    v_pack_b32_f16 v1, v1, v4
 ; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i16:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX11-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v1, v1, v3
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v2, v6, v5
-; GFX11-SDAG-NEXT:    v_ldexp_f16_e32 v3, v7, v4
-; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v0, v0, v2
-; GFX11-SDAG-NEXT:    v_pack_b32_f16 v1, v1, v3
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-SDAG-TRUE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-SDAG-TRUE16:       ; %bb.0:
+; GFX11-SDAG-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v1.l, v3.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v6.l, v5.l
+; GFX11-SDAG-TRUE16-NEXT:    v_ldexp_f16_e32 v1.h, v7.l, v4.l
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v0.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
+; GFX11-SDAG-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v1.h
+; GFX11-SDAG-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v0, v2, v0
+; GFX11-SDAG-TRUE16-NEXT:    v_pack_b32_f16 v1, v3, v1
+; GFX11-SDAG-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-SDAG-FAKE16:       ; %bb.0:
+; GFX11-SDAG-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-SDAG-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v6, v5
+; GFX11-SDAG-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v7, v4
+; GFX11-SDAG-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-FAKE16-NEXT:    v_pack_b32_f16 v1, v1, v3
+; GFX11-SDAG-FAKE16-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i16:
 ; GFX6-GISEL:       ; %bb.0:
@@ -1043,25 +1321,51 @@ define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
 ; GFX9-GISEL-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
 ; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i16:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v0, v0, v2
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v1, v1, v3
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v2, v4, v6
-; GFX11-GISEL-NEXT:    v_ldexp_f16_e32 v3, v5, v7
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
-; GFX11-GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GFX11-GISEL-TRUE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-GISEL-TRUE16:       ; %bb.0:
+; GFX11-GISEL-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.l, v0.l, v2.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v0.h, v1.l, v3.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.l, v4.l, v6.l
+; GFX11-GISEL-TRUE16-NEXT:    v_ldexp_f16_e32 v1.h, v5.l, v7.l
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v2.l, v0.l
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v0.h
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v3.l, v1.l
+; GFX11-GISEL-TRUE16-NEXT:    v_mov_b16_e32 v1.l, v1.h
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_and_b32_e32 v4, 0xffff, v0
+; GFX11-GISEL-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; GFX11-GISEL-TRUE16-NEXT:    v_lshl_or_b32 v1, v1, 16, v4
+; GFX11-GISEL-TRUE16-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-FAKE16-LABEL: test_ldexp_v4f16_v4i16:
+; GFX11-GISEL-FAKE16:       ; %bb.0:
+; GFX11-GISEL-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v2, v4, v6
+; GFX11-GISEL-FAKE16-NEXT:    v_ldexp_f16_e32 v3, v5, v7
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-FAKE16-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-FAKE16-NEXT:    s_setpc_b64 s[30:31]
   %result = call <4 x half> @llvm.ldexp.v4f16.v4i16(<4 x half> %a, <4 x i16> %b)
   ret <4 x half> %result
 }

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
index 84ced2fd224997..26ffd3a4e383b2 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt
@@ -783,49 +783,64 @@
 # GFX11: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf]
 0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf
 
-# GFX11: v_ldexp_f16_e32 v5, v1, v2              ; encoding: [0x01,0x05,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, v1, v2              ; encoding: [0x01,0x05,0x0a,0x76]
+# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l        ; encoding: [0x01,0x05,0x0a,0x76]
 0x01,0x05,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, v127, v2            ; encoding: [0x7f,0x05,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, v127, v2            ; encoding: [0x7f,0x05,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, v127.l, v2.l      ; encoding: [0x7f,0x05,0x0a,0x76]
 0x7f,0x05,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, s1, v2              ; encoding: [0x01,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, s1, v2              ; encoding: [0x01,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, s1, v2.l          ; encoding: [0x01,0x04,0x0a,0x76]
 0x01,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, s105, v2            ; encoding: [0x69,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, s105, v2            ; encoding: [0x69,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, s105, v2.l        ; encoding: [0x69,0x04,0x0a,0x76]
 0x69,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, vcc_lo, v2          ; encoding: [0x6a,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2          ; encoding: [0x6a,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, vcc_lo, v2.l      ; encoding: [0x6a,0x04,0x0a,0x76]
 0x6a,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, vcc_hi, v2          ; encoding: [0x6b,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2          ; encoding: [0x6b,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, vcc_hi, v2.l      ; encoding: [0x6b,0x04,0x0a,0x76]
 0x6b,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, ttmp15, v2          ; encoding: [0x7b,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2          ; encoding: [0x7b,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, ttmp15, v2.l      ; encoding: [0x7b,0x04,0x0a,0x76]
 0x7b,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, m0, v2              ; encoding: [0x7d,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, m0, v2              ; encoding: [0x7d,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, m0, v2.l          ; encoding: [0x7d,0x04,0x0a,0x76]
 0x7d,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, exec_lo, v2         ; encoding: [0x7e,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2         ; encoding: [0x7e,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, exec_lo, v2.l     ; encoding: [0x7e,0x04,0x0a,0x76]
 0x7e,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, exec_hi, v2         ; encoding: [0x7f,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2         ; encoding: [0x7f,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, exec_hi, v2.l     ; encoding: [0x7f,0x04,0x0a,0x76]
 0x7f,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, null, v2            ; encoding: [0x7c,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, null, v2            ; encoding: [0x7c,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, null, v2.l        ; encoding: [0x7c,0x04,0x0a,0x76]
 0x7c,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, -1, v2              ; encoding: [0xc1,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, -1, v2              ; encoding: [0xc1,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, -1, v2.l          ; encoding: [0xc1,0x04,0x0a,0x76]
 0xc1,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, 0.5, v2             ; encoding: [0xf0,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2             ; encoding: [0xf0,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, 0.5, v2.l         ; encoding: [0xf0,0x04,0x0a,0x76]
 0xf0,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v5, src_scc, v2         ; encoding: [0xfd,0x04,0x0a,0x76]
+# GFX11-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2         ; encoding: [0xfd,0x04,0x0a,0x76]
+# GFX11-REAL16:	v_ldexp_f16_e32 v5.l, src_scc, v2.l     ; encoding: [0xfd,0x04,0x0a,0x76]
 0xfd,0x04,0x0a,0x76
 
-# GFX11: v_ldexp_f16_e32 v127, 0xfe0b, v127      ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00]
+# GFX11-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127      ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00]
+# GFX11-REAL16:	v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l  ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00]
 0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00
 
 # GFX11: v_lshlrev_b32_e32 v5, v1, v2            ; encoding: [0x01,0x05,0x0a,0x30]


        


More information about the llvm-commits mailing list