[llvm] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` (PR #139185)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Thu May 8 21:46:17 PDT 2025


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/139185

>From 6bb5d6723ade28f8d4c0581e3cd3612b22552c27 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 9 May 2025 00:32:43 -0400
Subject: [PATCH] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and
 `v_cvt_f32_f16`

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  16 +-
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    |  14 +-
 .../inst-select-amdgcn.fcmp.constants.w32.mir |   8 +-
 .../inst-select-amdgcn.fcmp.constants.w64.mir |   8 +-
 .../AMDGPU/GlobalISel/inst-select-fptosi.mir  |  12 +-
 .../AMDGPU/GlobalISel/inst-select-fptoui.mir  |  12 +-
 .../AMDGPU/GlobalISel/inst-select-sitofp.mir  |   4 +-
 .../AMDGPU/GlobalISel/inst-select-uitofp.mir  |   4 +-
 .../AMDGPU/fix-sgpr-copies-f16-fake16.mir     |   2 +-
 ...schedule-regpressure-ilp-metric-spills.mir | 256 +++++++++---------
 .../AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s  |   6 +
 .../AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s  |   6 +
 .../gfx11_dasm_vop3_dpp16_from_vop1.txt       |   4 +-
 .../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt |   4 +-
 .../AMDGPU/gfx11_dasm_vop3_from_vop1.txt      |   4 +-
 .../AMDGPU/gfx12_dasm_vop3_from_vop1.txt      |   4 +-
 .../gfx12_dasm_vop3_from_vop1_dpp16.txt       |   4 +-
 .../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt |   4 +-
 18 files changed, 198 insertions(+), 174 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a90dace47fb2..4ec062916b29f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7658,11 +7658,17 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
       BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
           .addImm(16)
           .add(Inst.getOperand(1));
-      BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
-          .addImm(0) // src0_modifiers
-          .addReg(TmpReg)
-          .addImm(0)  // clamp
-          .addImm(0); // omod
+      const MachineInstrBuilder &MIB =
+          BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+              .addImm(0) // src0_modifiers
+              .addReg(TmpReg)
+              .addImm(0)  // clamp
+              .addImm(0); // omod
+      // FIXME: this is a temporary workaround to support opsel for certain
+      // fake16 instructions. Need to remove this code after we have true16 for
+      // related instructions.
+      if (NewOpcode == AMDGPU::V_CVT_F32_F16_fake16_e64)
+        MIB.addImm(0); // op_sel0
     }
 
     MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 352a3f9c2d27f..5522d89855332 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -261,6 +261,11 @@ foreach vt = Reg32Types.types in {
   >;
 }
 
+let HasOpSel = 1 in {
+  def VOP_F16_F32_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F16_F32>;
+  def VOP_F32_F16_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F32_F16>;
+} // End HasOpSel = 1
+
 let isReMaterializable = 1 in {
 let SchedRW = [WriteDoubleCvt] in {
 // OMod clears exceptions when set in this instruction
@@ -301,15 +306,16 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
     defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
   let OtherPredicates = [UseRealTrue16Insts] in
     defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
-  let OtherPredicates = [UseFakeTrue16Insts] in
-    defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+  let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F16_F32_fake16
+      : VOP1Inst<"v_cvt_f16_f32_fake16", VOP_F16_F32_Fake16_OP_SEL,
+                 any_fpround>;
 } // End FPDPRounding = 1, isReMaterializable = 0
 let OtherPredicates = [NotHasTrue16BitInsts] in
   defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
 let OtherPredicates = [UseRealTrue16Insts] in
   defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
-let OtherPredicates = [UseFakeTrue16Insts] in
-  defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16
+    : VOP1Inst<"v_cvt_f32_f16_fake16", VOP_F32_F16_Fake16_OP_SEL, any_fpextend>;
 
 let SubtargetPredicate = HasBF16ConversionInsts in
 defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 49383135ab0c5..66c8d11bfcc9e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -26,8 +26,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 828eb5d3fb40a..c7715eec76d37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -26,8 +26,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index 03cb907f82a16..2c7eb23dab364 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -149,7 +149,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -196,7 +196,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -251,7 +251,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -301,7 +301,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -350,7 +350,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -407,7 +407,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index 521a0e8a2a796..489a6b360124f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -99,7 +99,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -146,7 +146,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -201,7 +201,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -251,7 +251,7 @@ body: |
     ; GFX11-FAKE16: liveins: $vgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
@@ -300,7 +300,7 @@ body: |
     ; GFX11-FAKE16: liveins: $sgpr0
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:sgpr(s32) = COPY $sgpr0
@@ -357,7 +357,7 @@ body: |
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
     ; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
index 3888ce87b46fd..72e8fc52917a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
@@ -101,7 +101,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_SITOFP %0
@@ -150,7 +150,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s16) = G_SITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
index 35d622dc57d18..1e3c4c8e596b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
@@ -115,7 +115,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_UITOFP %0
@@ -164,7 +164,7 @@ body: |
     ; GFX11-FAKE16-NEXT: {{  $}}
     ; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s16) = G_UITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 5d90bab1384eb..0427b741d2456 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -33,7 +33,7 @@ body:             |
     ; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
     ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
     ; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_fake16_e64_]], implicit $exec
-    ; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
     %2:sreg_32 = COPY %1:vgpr_32
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
index aa0d1fe45e9a8..7ff3788096303 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
@@ -424,14 +424,14 @@ body:             |
     %264:vgpr_32 = V_LSHL_OR_B32_e64 %254, 8, %263, implicit $exec
     %265:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %266:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, implicit $mode, implicit $exec
-    %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, implicit $mode, implicit $exec
-    %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, implicit $mode, implicit $exec
-    %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, implicit $mode, implicit $exec
-    %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, implicit $mode, implicit $exec
-    %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, implicit $mode, implicit $exec
-    %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, implicit $mode, implicit $exec
-    %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, implicit $mode, implicit $exec
+    %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %275.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %273, 0, %274, 0, 0, implicit $mode, implicit $exec
     %275.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %271, 0, %272, 0, 0, implicit $mode, implicit $exec
     %275.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %269, 0, %270, 0, 0, implicit $mode, implicit $exec
@@ -446,14 +446,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %212.sub6, %212.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %277:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %278:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, implicit $mode, implicit $exec
-    %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, implicit $mode, implicit $exec
-    %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, implicit $mode, implicit $exec
-    %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, implicit $mode, implicit $exec
-    %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, implicit $mode, implicit $exec
-    %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, implicit $mode, implicit $exec
-    %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, implicit $mode, implicit $exec
-    %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, implicit $mode, implicit $exec
+    %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %287.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %285, 0, %286, 0, 0, implicit $mode, implicit $exec
     %287.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %283, 0, %284, 0, 0, implicit $mode, implicit $exec
     %287.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %281, 0, %282, 0, 0, implicit $mode, implicit $exec
@@ -465,14 +465,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %246.sub6, %246.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %288:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %289:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, implicit $mode, implicit $exec
-    %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, implicit $mode, implicit $exec
-    %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, implicit $mode, implicit $exec
-    %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, implicit $mode, implicit $exec
-    %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, implicit $mode, implicit $exec
-    %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, implicit $mode, implicit $exec
-    %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, implicit $mode, implicit $exec
-    %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, implicit $mode, implicit $exec
+    %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %298.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %296, 0, %297, 0, 0, implicit $mode, implicit $exec
     %298.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %294, 0, %295, 0, 0, implicit $mode, implicit $exec
     %298.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %292, 0, %293, 0, 0, implicit $mode, implicit $exec
@@ -485,14 +485,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %250.sub6, %250.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %300:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %301:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, implicit $mode, implicit $exec
-    %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, implicit $mode, implicit $exec
-    %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, implicit $mode, implicit $exec
-    %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, implicit $mode, implicit $exec
-    %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, implicit $mode, implicit $exec
-    %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, implicit $mode, implicit $exec
-    %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, implicit $mode, implicit $exec
-    %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, implicit $mode, implicit $exec
+    %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %310.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %308, 0, %309, 0, 0, implicit $mode, implicit $exec
     %310.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %306, 0, %307, 0, 0, implicit $mode, implicit $exec
     %310.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %304, 0, %305, 0, 0, implicit $mode, implicit $exec
@@ -506,14 +506,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %253.sub6, %253.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %313:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %314:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, implicit $mode, implicit $exec
-    %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, implicit $mode, implicit $exec
-    %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, implicit $mode, implicit $exec
-    %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, implicit $mode, implicit $exec
-    %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, implicit $mode, implicit $exec
-    %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, implicit $mode, implicit $exec
-    %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, implicit $mode, implicit $exec
-    %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, implicit $mode, implicit $exec
+    %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %323.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %321, 0, %322, 0, 0, implicit $mode, implicit $exec
     %323.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %319, 0, %320, 0, 0, implicit $mode, implicit $exec
     %323.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %317, 0, %318, 0, 0, implicit $mode, implicit $exec
@@ -526,14 +526,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %214.sub6, %214.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %325:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %326:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, implicit $mode, implicit $exec
-    %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, implicit $mode, implicit $exec
-    %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, implicit $mode, implicit $exec
-    %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, implicit $mode, implicit $exec
-    %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, implicit $mode, implicit $exec
-    %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, implicit $mode, implicit $exec
-    %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, implicit $mode, implicit $exec
-    %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, implicit $mode, implicit $exec
+    %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %335.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %333, 0, %334, 0, 0, implicit $mode, implicit $exec
     %335.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %331, 0, %332, 0, 0, implicit $mode, implicit $exec
     %335.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %329, 0, %330, 0, 0, implicit $mode, implicit $exec
@@ -546,14 +546,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %247.sub6, %247.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %337:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %338:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, implicit $mode, implicit $exec
-    %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, implicit $mode, implicit $exec
-    %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, implicit $mode, implicit $exec
-    %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, implicit $mode, implicit $exec
-    %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, implicit $mode, implicit $exec
-    %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, implicit $mode, implicit $exec
-    %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, implicit $mode, implicit $exec
-    %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, implicit $mode, implicit $exec
+    %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %347.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %345, 0, %346, 0, 0, implicit $mode, implicit $exec
     %347.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %343, 0, %344, 0, 0, implicit $mode, implicit $exec
     %347.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %341, 0, %342, 0, 0, implicit $mode, implicit $exec
@@ -567,14 +567,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %213.sub6, %213.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %350:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %351:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, implicit $mode, implicit $exec
-    %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, implicit $mode, implicit $exec
-    %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, implicit $mode, implicit $exec
-    %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, implicit $mode, implicit $exec
-    %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, implicit $mode, implicit $exec
-    %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, implicit $mode, implicit $exec
-    %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, implicit $mode, implicit $exec
-    %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, implicit $mode, implicit $exec
+    %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %360.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %358, 0, %359, 0, 0, implicit $mode, implicit $exec
     %360.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %356, 0, %357, 0, 0, implicit $mode, implicit $exec
     %360.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %354, 0, %355, 0, 0, implicit $mode, implicit $exec
@@ -588,14 +588,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %216.sub6, %216.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %363:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %364:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, implicit $mode, implicit $exec
-    %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, implicit $mode, implicit $exec
-    %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, implicit $mode, implicit $exec
-    %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, implicit $mode, implicit $exec
-    %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, implicit $mode, implicit $exec
-    %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, implicit $mode, implicit $exec
-    %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, implicit $mode, implicit $exec
-    %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, implicit $mode, implicit $exec
+    %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %373.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %371, 0, %372, 0, 0, implicit $mode, implicit $exec
     %373.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %369, 0, %370, 0, 0, implicit $mode, implicit $exec
     %373.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %367, 0, %368, 0, 0, implicit $mode, implicit $exec
@@ -608,14 +608,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %248.sub6, %248.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %375:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %376:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, implicit $mode, implicit $exec
-    %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, implicit $mode, implicit $exec
-    %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, implicit $mode, implicit $exec
-    %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, implicit $mode, implicit $exec
-    %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, implicit $mode, implicit $exec
-    %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, implicit $mode, implicit $exec
-    %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, implicit $mode, implicit $exec
-    %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, implicit $mode, implicit $exec
+    %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %385.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %383, 0, %384, 0, 0, implicit $mode, implicit $exec
     %385.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %381, 0, %382, 0, 0, implicit $mode, implicit $exec
     %385.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %379, 0, %380, 0, 0, implicit $mode, implicit $exec
@@ -627,14 +627,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %217.sub6, %217.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %386:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %387:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, implicit $mode, implicit $exec
-    %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, implicit $mode, implicit $exec
-    %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, implicit $mode, implicit $exec
-    %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, implicit $mode, implicit $exec
-    %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, implicit $mode, implicit $exec
-    %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, implicit $mode, implicit $exec
-    %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, implicit $mode, implicit $exec
-    %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, implicit $mode, implicit $exec
+    %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %396.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %394, 0, %395, 0, 0, implicit $mode, implicit $exec
     %396.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %392, 0, %393, 0, 0, implicit $mode, implicit $exec
     %396.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %390, 0, %391, 0, 0, implicit $mode, implicit $exec
@@ -647,14 +647,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %251.sub6, %251.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %398:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %399:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, implicit $mode, implicit $exec
-    %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, implicit $mode, implicit $exec
-    %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, implicit $mode, implicit $exec
-    %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, implicit $mode, implicit $exec
-    %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, implicit $mode, implicit $exec
-    %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, implicit $mode, implicit $exec
-    %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, implicit $mode, implicit $exec
-    %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, implicit $mode, implicit $exec
+    %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %408.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %406, 0, %407, 0, 0, implicit $mode, implicit $exec
     %408.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %404, 0, %405, 0, 0, implicit $mode, implicit $exec
     %408.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %402, 0, %403, 0, 0, implicit $mode, implicit $exec
@@ -667,14 +667,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %252.sub6, %252.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %410:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %411:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, implicit $mode, implicit $exec
-    %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, implicit $mode, implicit $exec
-    %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, implicit $mode, implicit $exec
-    %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, implicit $mode, implicit $exec
-    %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, implicit $mode, implicit $exec
-    %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, implicit $mode, implicit $exec
-    %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, implicit $mode, implicit $exec
-    %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, implicit $mode, implicit $exec
+    %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %420.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %418, 0, %419, 0, 0, implicit $mode, implicit $exec
     %420.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %416, 0, %417, 0, 0, implicit $mode, implicit $exec
     %420.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %414, 0, %415, 0, 0, implicit $mode, implicit $exec
@@ -687,14 +687,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %220.sub6, %220.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %422:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %423:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, implicit $mode, implicit $exec
-    %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, implicit $mode, implicit $exec
-    %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, implicit $mode, implicit $exec
-    %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, implicit $mode, implicit $exec
-    %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, implicit $mode, implicit $exec
-    %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, implicit $mode, implicit $exec
-    %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, implicit $mode, implicit $exec
-    %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, implicit $mode, implicit $exec
+    %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %432.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %430, 0, %431, 0, 0, implicit $mode, implicit $exec
     %432.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %428, 0, %429, 0, 0, implicit $mode, implicit $exec
     %432.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %426, 0, %427, 0, 0, implicit $mode, implicit $exec
@@ -707,14 +707,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %249.sub6, %249.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %434:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %435:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, implicit $mode, implicit $exec
-    %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, implicit $mode, implicit $exec
-    %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, implicit $mode, implicit $exec
-    %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, implicit $mode, implicit $exec
-    %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, implicit $mode, implicit $exec
-    %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, implicit $mode, implicit $exec
-    %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, implicit $mode, implicit $exec
-    %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, implicit $mode, implicit $exec
+    %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %444.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %442, 0, %443, 0, 0, implicit $mode, implicit $exec
     %444.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %440, 0, %441, 0, 0, implicit $mode, implicit $exec
     %444.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %438, 0, %439, 0, 0, implicit $mode, implicit $exec
@@ -727,14 +727,14 @@ body:             |
     DS_WRITE2ST64_B32_gfx9 %262, %219.sub6, %219.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
     %446:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
     %447:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
-    %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, implicit $mode, implicit $exec
-    %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, implicit $mode, implicit $exec
-    %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, implicit $mode, implicit $exec
-    %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, implicit $mode, implicit $exec
-    %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, implicit $mode, implicit $exec
-    %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, implicit $mode, implicit $exec
-    %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, implicit $mode, implicit $exec
-    %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, implicit $mode, implicit $exec
+    %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, 0, implicit $mode, implicit $exec
+    %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, 0, implicit $mode, implicit $exec
+    %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, 0, implicit $mode, implicit $exec
+    %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, 0, implicit $mode, implicit $exec
+    %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, 0, implicit $mode, implicit $exec
     undef %456.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %454, 0, %455, 0, 0, implicit $mode, implicit $exec
     %456.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %452, 0, %453, 0, 0, implicit $mode, implicit $exec
     %456.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %450, 0, %451, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s
index 79cc12f366573..ae79fd2860ad7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s
@@ -432,6 +432,9 @@ v_cvt_f16_f32_e64 v5, null
 v_cvt_f16_f32_e64 v5, -1
 // GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
 
+v_cvt_f16_f32_e64 v5, v1 op_sel:[1]
+// GFX11: encoding: [0x05,0x08,0x8a,0xd5,0x01,0x01,0x00,0x00]
+
 v_cvt_f16_f32_e64 v5, 0.5 mul:2
 // GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
 
@@ -567,6 +570,9 @@ v_cvt_f32_f16_e64 v5, null
 v_cvt_f32_f16_e64 v5, -1
 // GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00]
 
+v_cvt_f32_f16_e64 v5, v1 op_sel:[1]
+// GFX11: encoding: [0x05,0x08,0x8b,0xd5,0x01,0x01,0x00,0x00]
+
 v_cvt_f32_f16_e64 v5, 0.5 mul:2
 // GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s
index 438ebe20e88b4..7c25962358e09 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s
@@ -570,6 +570,9 @@ v_cvt_f16_f32_e64 v5, null
 v_cvt_f16_f32_e64 v5, -1
 // GFX12: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
 
+v_cvt_f16_f32_e64 v5, v1 op_sel:[1]
+// GFX12: encoding: [0x05,0x08,0x8a,0xd5,0x01,0x01,0x00,0x00]
+
 v_cvt_f16_f32_e64 v5, 0.5 mul:2
 // GFX12: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
 
@@ -705,6 +708,9 @@ v_cvt_f32_f16_e64 v5, null
 v_cvt_f32_f16_e64 v5, -1
 // GFX12: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00]
 
+v_cvt_f32_f16_e64 v5, v1 op_sel:[1]
+// GFX12: encoding: [0x05,0x08,0x8b,0xd5,0x01,0x01,0x00,0x00]
+
 v_cvt_f32_f16_e64 v5, 0.5 mul:2
 // GFX12: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08]
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index 282ff229c57e6..4f1b40f0989ff 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -448,7 +448,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -644,7 +644,7 @@
 
 0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index 5995762ce6ff1..228cb94582473 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -124,7 +124,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -200,7 +200,7 @@
 
 0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index d7e73909286a2..a1b2ac8477783 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -504,7 +504,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
 # GFX11-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
-# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
 
 0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00
 # GFX11-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l            ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -699,7 +699,7 @@
 
 0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00
 # GFX11-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255              ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
 
 0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00
 # GFX11: v_cvt_f32_f64_e64 v5, v[1:2]            ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index 3ccf6feac4cca..8cc1d769b5307 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -556,7 +556,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
 # GFX12-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
-# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
 
 0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l            ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -751,7 +751,7 @@
 
 0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00
 # GFX12-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255              ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
 
 0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00
 # GFX12: v_cvt_f32_f64_e64 v5, v[1:2]            ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index a020b0ae46a37..8982c399c6af7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -478,7 +478,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -674,7 +674,7 @@
 
 0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
 # GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
 
 0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
 # GFX12: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index ad3c673b4e390..81565cae6ea04 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -154,7 +154,7 @@
 
 0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -230,7 +230,7 @@
 
 0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
 # GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
 
 0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
 # GFX12: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]



More information about the llvm-commits mailing list