[llvm] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and `v_cvt_f32_f16` (PR #139185)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 21:46:17 PDT 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/139185
>From 6bb5d6723ade28f8d4c0581e3cd3612b22552c27 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 9 May 2025 00:32:43 -0400
Subject: [PATCH] [AMDGPU][Fake16] Support OPSEL for `v_cvt_f16_f32` and
`v_cvt_f32_f16`
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 16 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 14 +-
.../inst-select-amdgcn.fcmp.constants.w32.mir | 8 +-
.../inst-select-amdgcn.fcmp.constants.w64.mir | 8 +-
.../AMDGPU/GlobalISel/inst-select-fptosi.mir | 12 +-
.../AMDGPU/GlobalISel/inst-select-fptoui.mir | 12 +-
.../AMDGPU/GlobalISel/inst-select-sitofp.mir | 4 +-
.../AMDGPU/GlobalISel/inst-select-uitofp.mir | 4 +-
.../AMDGPU/fix-sgpr-copies-f16-fake16.mir | 2 +-
...schedule-regpressure-ilp-metric-spills.mir | 256 +++++++++---------
.../AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s | 6 +
.../AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s | 6 +
.../gfx11_dasm_vop3_dpp16_from_vop1.txt | 4 +-
.../AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt | 4 +-
.../AMDGPU/gfx11_dasm_vop3_from_vop1.txt | 4 +-
.../AMDGPU/gfx12_dasm_vop3_from_vop1.txt | 4 +-
.../gfx12_dasm_vop3_from_vop1_dpp16.txt | 4 +-
.../AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt | 4 +-
18 files changed, 198 insertions(+), 174 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a90dace47fb2..4ec062916b29f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7658,11 +7658,17 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHRREV_B32_e64), TmpReg)
.addImm(16)
.add(Inst.getOperand(1));
- BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
- .addImm(0) // src0_modifiers
- .addReg(TmpReg)
- .addImm(0) // clamp
- .addImm(0); // omod
+ const MachineInstrBuilder &MIB =
+ BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+ .addImm(0) // src0_modifiers
+ .addReg(TmpReg)
+ .addImm(0) // clamp
+ .addImm(0); // omod
+ // FIXME: this is a temporary workaround to support opsel for certain
+ // fake16 instructions. Need to remove this code after we have true16 for
+ // related instructions.
+ if (NewOpcode == AMDGPU::V_CVT_F32_F16_fake16_e64)
+ MIB.addImm(0); // op_sel0
}
MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 352a3f9c2d27f..5522d89855332 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -261,6 +261,11 @@ foreach vt = Reg32Types.types in {
>;
}
+let HasOpSel = 1 in {
+ def VOP_F16_F32_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F16_F32>;
+ def VOP_F32_F16_Fake16_OP_SEL : VOPProfile_Fake16<VOP_F32_F16>;
+} // End HasOpSel = 1
+
let isReMaterializable = 1 in {
let SchedRW = [WriteDoubleCvt] in {
// OMod clears exceptions when set in this instruction
@@ -301,15 +306,16 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
let OtherPredicates = [UseRealTrue16Insts] in
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
- let OtherPredicates = [UseFakeTrue16Insts] in
- defm V_CVT_F16_F32_fake16 : VOP1Inst <"v_cvt_f16_f32_fake16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
+ let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F16_F32_fake16
+ : VOP1Inst<"v_cvt_f16_f32_fake16", VOP_F16_F32_Fake16_OP_SEL,
+ any_fpround>;
} // End FPDPRounding = 1, isReMaterializable = 0
let OtherPredicates = [NotHasTrue16BitInsts] in
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
let OtherPredicates = [UseRealTrue16Insts] in
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
-let OtherPredicates = [UseFakeTrue16Insts] in
- defm V_CVT_F32_F16_fake16 : VOP1Inst <"v_cvt_f32_f16_fake16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
+let OtherPredicates = [UseFakeTrue16Insts] in defm V_CVT_F32_F16_fake16
+ : VOP1Inst<"v_cvt_f32_f16_fake16", VOP_F32_F16_Fake16_OP_SEL, any_fpextend>;
let SubtargetPredicate = HasBF16ConversionInsts in
defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
index 49383135ab0c5..66c8d11bfcc9e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w32.mir
@@ -26,8 +26,8 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_32 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
index 828eb5d3fb40a..c7715eec76d37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fcmp.constants.w64.mir
@@ -26,8 +26,8 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CMP_F_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_F_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_F_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -62,8 +62,8 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CMP_TRU_F16_fake16_e64_:%[0-9]+]]:sreg_64 = V_CMP_TRU_F16_fake16_e64 0, [[V_CVT_F16_F32_fake16_e64_]], 0, [[V_CVT_F16_F32_fake16_e64_1]], 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CMP_TRU_F16_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
index 03cb907f82a16..2c7eb23dab364 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptosi.mir
@@ -149,7 +149,7 @@ body: |
; GFX11-FAKE16: liveins: $vgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -196,7 +196,7 @@ body: |
; GFX11-FAKE16: liveins: $sgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
@@ -251,7 +251,7 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -301,7 +301,7 @@ body: |
; GFX11-FAKE16: liveins: $vgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -350,7 +350,7 @@ body: |
; GFX11-FAKE16: liveins: $sgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
@@ -407,7 +407,7 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_I32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
index 521a0e8a2a796..489a6b360124f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fptoui.mir
@@ -99,7 +99,7 @@ body: |
; GFX11-FAKE16: liveins: $vgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -146,7 +146,7 @@ body: |
; GFX11-FAKE16: liveins: $sgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
@@ -201,7 +201,7 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -251,7 +251,7 @@ body: |
; GFX11-FAKE16: liveins: $vgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
@@ -300,7 +300,7 @@ body: |
; GFX11-FAKE16: liveins: $sgpr0
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
%0:sgpr(s32) = COPY $sgpr0
@@ -357,7 +357,7 @@ body: |
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768
; GFX11-FAKE16-NEXT: [[V_XOR_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e64 [[S_MOV_B32_]], [[COPY]], implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_fake16_e64 0, [[V_XOR_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 [[V_CVT_F32_F16_fake16_e64_]], implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: S_ENDPGM 0, implicit [[V_CVT_U32_F32_e32_]]
%0:vgpr(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
index 3888ce87b46fd..72e8fc52917a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sitofp.mir
@@ -101,7 +101,7 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_SITOFP %0
@@ -150,7 +150,7 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-FAKE16-NEXT: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_I32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s16) = G_SITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
index 35d622dc57d18..1e3c4c8e596b5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uitofp.mir
@@ -115,7 +115,7 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:vgpr(s32) = COPY $vgpr0
%1:vgpr(s16) = G_UITOFP %0
@@ -164,7 +164,7 @@ body: |
; GFX11-FAKE16-NEXT: {{ $}}
; GFX11-FAKE16-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX11-FAKE16-NEXT: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; GFX11-FAKE16-NEXT: [[V_CVT_F16_F32_fake16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F16_F32_fake16_e64 0, [[V_CVT_F32_U32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
; GFX11-FAKE16-NEXT: $vgpr0 = COPY [[V_CVT_F16_F32_fake16_e64_]]
%0:sgpr(s32) = COPY $sgpr0
%1:vgpr(s16) = G_UITOFP %0
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
index 5d90bab1384eb..0427b741d2456 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16-fake16.mir
@@ -33,7 +33,7 @@ body: |
; GCN-NEXT: [[V_CVT_F16_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F16_U16_fake16_e64 [[DEF]], 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[V_CVT_F16_U16_fake16_e64_]], implicit $exec
- ; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[V_CVT_F32_F16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_CVT_F32_F16_fake16_e64 0, [[V_LSHRREV_B32_e64_]], 0, 0, 0, implicit $mode, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = V_CVT_F16_U16_fake16_e64 %0:vgpr_32, 0, 0, implicit $mode, implicit $exec
%2:sreg_32 = COPY %1:vgpr_32
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
index aa0d1fe45e9a8..7ff3788096303 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-ilp-metric-spills.mir
@@ -424,14 +424,14 @@ body: |
%264:vgpr_32 = V_LSHL_OR_B32_e64 %254, 8, %263, implicit $exec
%265:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%266:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, implicit $mode, implicit $exec
- %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, implicit $mode, implicit $exec
- %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, implicit $mode, implicit $exec
- %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, implicit $mode, implicit $exec
- %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, implicit $mode, implicit $exec
- %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, implicit $mode, implicit $exec
- %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, implicit $mode, implicit $exec
- %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, implicit $mode, implicit $exec
+ %267:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %268:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %269:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %270:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %265.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %271:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %272:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %273:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %274:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %266.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %275.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %273, 0, %274, 0, 0, implicit $mode, implicit $exec
%275.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %271, 0, %272, 0, 0, implicit $mode, implicit $exec
%275.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %269, 0, %270, 0, 0, implicit $mode, implicit $exec
@@ -446,14 +446,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %212.sub6, %212.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%277:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%278:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, implicit $mode, implicit $exec
- %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, implicit $mode, implicit $exec
- %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, implicit $mode, implicit $exec
- %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, implicit $mode, implicit $exec
- %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, implicit $mode, implicit $exec
- %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, implicit $mode, implicit $exec
- %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, implicit $mode, implicit $exec
- %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, implicit $mode, implicit $exec
+ %279:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %280:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %281:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %282:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %277.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %283:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %284:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %285:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %286:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %278.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %287.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %285, 0, %286, 0, 0, implicit $mode, implicit $exec
%287.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %283, 0, %284, 0, 0, implicit $mode, implicit $exec
%287.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %281, 0, %282, 0, 0, implicit $mode, implicit $exec
@@ -465,14 +465,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %246.sub6, %246.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%288:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%289:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, implicit $mode, implicit $exec
- %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, implicit $mode, implicit $exec
- %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, implicit $mode, implicit $exec
- %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, implicit $mode, implicit $exec
- %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, implicit $mode, implicit $exec
- %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, implicit $mode, implicit $exec
- %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, implicit $mode, implicit $exec
- %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, implicit $mode, implicit $exec
+ %290:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %291:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %292:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %293:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %288.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %294:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %295:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %296:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %297:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %289.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %298.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %296, 0, %297, 0, 0, implicit $mode, implicit $exec
%298.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %294, 0, %295, 0, 0, implicit $mode, implicit $exec
%298.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %292, 0, %293, 0, 0, implicit $mode, implicit $exec
@@ -485,14 +485,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %250.sub6, %250.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%300:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%301:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, implicit $mode, implicit $exec
- %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, implicit $mode, implicit $exec
- %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, implicit $mode, implicit $exec
- %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, implicit $mode, implicit $exec
- %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, implicit $mode, implicit $exec
- %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, implicit $mode, implicit $exec
- %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, implicit $mode, implicit $exec
- %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, implicit $mode, implicit $exec
+ %302:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %303:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %304:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %305:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %300.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %306:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %307:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %308:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %309:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %301.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %310.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %308, 0, %309, 0, 0, implicit $mode, implicit $exec
%310.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %306, 0, %307, 0, 0, implicit $mode, implicit $exec
%310.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %304, 0, %305, 0, 0, implicit $mode, implicit $exec
@@ -506,14 +506,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %253.sub6, %253.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%313:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%314:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, implicit $mode, implicit $exec
- %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, implicit $mode, implicit $exec
- %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, implicit $mode, implicit $exec
- %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, implicit $mode, implicit $exec
- %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, implicit $mode, implicit $exec
- %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, implicit $mode, implicit $exec
- %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, implicit $mode, implicit $exec
- %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, implicit $mode, implicit $exec
+ %315:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %316:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %317:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %318:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %313.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %319:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %320:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %321:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %322:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %314.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %323.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %321, 0, %322, 0, 0, implicit $mode, implicit $exec
%323.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %319, 0, %320, 0, 0, implicit $mode, implicit $exec
%323.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %317, 0, %318, 0, 0, implicit $mode, implicit $exec
@@ -526,14 +526,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %214.sub6, %214.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%325:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%326:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, implicit $mode, implicit $exec
- %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, implicit $mode, implicit $exec
- %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, implicit $mode, implicit $exec
- %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, implicit $mode, implicit $exec
- %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, implicit $mode, implicit $exec
- %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, implicit $mode, implicit $exec
- %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, implicit $mode, implicit $exec
- %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, implicit $mode, implicit $exec
+ %327:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %328:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %329:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %330:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %325.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %331:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %332:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %333:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %334:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %326.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %335.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %333, 0, %334, 0, 0, implicit $mode, implicit $exec
%335.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %331, 0, %332, 0, 0, implicit $mode, implicit $exec
%335.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %329, 0, %330, 0, 0, implicit $mode, implicit $exec
@@ -546,14 +546,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %247.sub6, %247.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%337:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%338:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, implicit $mode, implicit $exec
- %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, implicit $mode, implicit $exec
- %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, implicit $mode, implicit $exec
- %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, implicit $mode, implicit $exec
- %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, implicit $mode, implicit $exec
- %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, implicit $mode, implicit $exec
- %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, implicit $mode, implicit $exec
- %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, implicit $mode, implicit $exec
+ %339:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %340:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %341:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %342:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %337.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %343:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %344:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %345:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %346:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %338.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %347.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %345, 0, %346, 0, 0, implicit $mode, implicit $exec
%347.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %343, 0, %344, 0, 0, implicit $mode, implicit $exec
%347.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %341, 0, %342, 0, 0, implicit $mode, implicit $exec
@@ -567,14 +567,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %213.sub6, %213.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%350:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%351:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, implicit $mode, implicit $exec
- %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, implicit $mode, implicit $exec
- %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, implicit $mode, implicit $exec
- %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, implicit $mode, implicit $exec
- %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, implicit $mode, implicit $exec
- %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, implicit $mode, implicit $exec
- %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, implicit $mode, implicit $exec
- %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, implicit $mode, implicit $exec
+ %352:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %353:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %354:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %355:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %350.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %356:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %357:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %358:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %359:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %351.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %360.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %358, 0, %359, 0, 0, implicit $mode, implicit $exec
%360.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %356, 0, %357, 0, 0, implicit $mode, implicit $exec
%360.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %354, 0, %355, 0, 0, implicit $mode, implicit $exec
@@ -588,14 +588,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %216.sub6, %216.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%363:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%364:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, implicit $mode, implicit $exec
- %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, implicit $mode, implicit $exec
- %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, implicit $mode, implicit $exec
- %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, implicit $mode, implicit $exec
- %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, implicit $mode, implicit $exec
- %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, implicit $mode, implicit $exec
- %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, implicit $mode, implicit $exec
- %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, implicit $mode, implicit $exec
+ %365:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %366:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %367:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %368:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %363.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %369:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %370:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %371:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %372:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %364.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %373.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %371, 0, %372, 0, 0, implicit $mode, implicit $exec
%373.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %369, 0, %370, 0, 0, implicit $mode, implicit $exec
%373.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %367, 0, %368, 0, 0, implicit $mode, implicit $exec
@@ -608,14 +608,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %248.sub6, %248.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%375:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%376:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, implicit $mode, implicit $exec
- %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, implicit $mode, implicit $exec
- %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, implicit $mode, implicit $exec
- %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, implicit $mode, implicit $exec
- %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, implicit $mode, implicit $exec
- %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, implicit $mode, implicit $exec
- %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, implicit $mode, implicit $exec
- %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, implicit $mode, implicit $exec
+ %377:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %378:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %379:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %380:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %375.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %381:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %382:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %383:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %384:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %376.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %385.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %383, 0, %384, 0, 0, implicit $mode, implicit $exec
%385.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %381, 0, %382, 0, 0, implicit $mode, implicit $exec
%385.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %379, 0, %380, 0, 0, implicit $mode, implicit $exec
@@ -627,14 +627,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %217.sub6, %217.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%386:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%387:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, implicit $mode, implicit $exec
- %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, implicit $mode, implicit $exec
- %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, implicit $mode, implicit $exec
- %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, implicit $mode, implicit $exec
- %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, implicit $mode, implicit $exec
- %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, implicit $mode, implicit $exec
- %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, implicit $mode, implicit $exec
- %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, implicit $mode, implicit $exec
+ %388:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %389:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %390:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %391:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %386.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %392:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %393:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %394:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %395:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %387.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %396.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %394, 0, %395, 0, 0, implicit $mode, implicit $exec
%396.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %392, 0, %393, 0, 0, implicit $mode, implicit $exec
%396.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %390, 0, %391, 0, 0, implicit $mode, implicit $exec
@@ -647,14 +647,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %251.sub6, %251.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%398:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%399:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, implicit $mode, implicit $exec
- %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, implicit $mode, implicit $exec
- %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, implicit $mode, implicit $exec
- %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, implicit $mode, implicit $exec
- %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, implicit $mode, implicit $exec
- %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, implicit $mode, implicit $exec
- %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, implicit $mode, implicit $exec
- %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, implicit $mode, implicit $exec
+ %400:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %401:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %402:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %403:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %398.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %404:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %405:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %406:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %407:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %399.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %408.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %406, 0, %407, 0, 0, implicit $mode, implicit $exec
%408.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %404, 0, %405, 0, 0, implicit $mode, implicit $exec
%408.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %402, 0, %403, 0, 0, implicit $mode, implicit $exec
@@ -667,14 +667,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %252.sub6, %252.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%410:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%411:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, implicit $mode, implicit $exec
- %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, implicit $mode, implicit $exec
- %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, implicit $mode, implicit $exec
- %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, implicit $mode, implicit $exec
- %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, implicit $mode, implicit $exec
- %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, implicit $mode, implicit $exec
- %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, implicit $mode, implicit $exec
- %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, implicit $mode, implicit $exec
+ %412:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %413:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %414:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %415:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %410.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %416:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %417:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %418:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %419:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %411.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %420.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %418, 0, %419, 0, 0, implicit $mode, implicit $exec
%420.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %416, 0, %417, 0, 0, implicit $mode, implicit $exec
%420.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %414, 0, %415, 0, 0, implicit $mode, implicit $exec
@@ -687,14 +687,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %220.sub6, %220.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%422:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%423:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, implicit $mode, implicit $exec
- %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, implicit $mode, implicit $exec
- %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, implicit $mode, implicit $exec
- %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, implicit $mode, implicit $exec
- %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, implicit $mode, implicit $exec
- %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, implicit $mode, implicit $exec
- %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, implicit $mode, implicit $exec
- %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, implicit $mode, implicit $exec
+ %424:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %425:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %426:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %427:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %422.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %428:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %429:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %430:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %431:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %423.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %432.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %430, 0, %431, 0, 0, implicit $mode, implicit $exec
%432.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %428, 0, %429, 0, 0, implicit $mode, implicit $exec
%432.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %426, 0, %427, 0, 0, implicit $mode, implicit $exec
@@ -707,14 +707,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %249.sub6, %249.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%434:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%435:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, implicit $mode, implicit $exec
- %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, implicit $mode, implicit $exec
- %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, implicit $mode, implicit $exec
- %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, implicit $mode, implicit $exec
- %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, implicit $mode, implicit $exec
- %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, implicit $mode, implicit $exec
- %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, implicit $mode, implicit $exec
- %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, implicit $mode, implicit $exec
+ %436:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %437:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %438:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %439:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %434.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %440:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %441:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %442:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %443:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %435.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %444.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %442, 0, %443, 0, 0, implicit $mode, implicit $exec
%444.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %440, 0, %441, 0, 0, implicit $mode, implicit $exec
%444.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %438, 0, %439, 0, 0, implicit $mode, implicit $exec
@@ -727,14 +727,14 @@ body: |
DS_WRITE2ST64_B32_gfx9 %262, %219.sub6, %219.sub7, 6, 7, 0, implicit $exec :: (store (s32), addrspace 3)
%446:vreg_128 = DS_READ_B128_gfx9 %264, 0, 0, implicit $exec :: (load (s128), addrspace 3)
%447:vreg_128 = DS_READ_B128_gfx9 %264, 16, 0, implicit $exec :: (load (s128), addrspace 3)
- %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, implicit $mode, implicit $exec
- %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, implicit $mode, implicit $exec
- %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, implicit $mode, implicit $exec
- %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, implicit $mode, implicit $exec
- %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, implicit $mode, implicit $exec
- %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, implicit $mode, implicit $exec
- %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, implicit $mode, implicit $exec
- %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, implicit $mode, implicit $exec
+ %448:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %449:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %450:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %451:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %446.sub3, 0, 0, 0, implicit $mode, implicit $exec
+ %452:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub0, 0, 0, 0, implicit $mode, implicit $exec
+ %453:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub1, 0, 0, 0, implicit $mode, implicit $exec
+ %454:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub2, 0, 0, 0, implicit $mode, implicit $exec
+ %455:vgpr_32 = V_CVT_F16_F32_fake16_e64 0, %447.sub3, 0, 0, 0, implicit $mode, implicit $exec
undef %456.sub3:vreg_128 = V_PACK_B32_F16_e64 0, %454, 0, %455, 0, 0, implicit $mode, implicit $exec
%456.sub2:vreg_128 = V_PACK_B32_F16_e64 0, %452, 0, %453, 0, 0, implicit $mode, implicit $exec
%456.sub1:vreg_128 = V_PACK_B32_F16_e64 0, %450, 0, %451, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s
index 79cc12f366573..ae79fd2860ad7 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vop1-fake16.s
@@ -432,6 +432,9 @@ v_cvt_f16_f32_e64 v5, null
v_cvt_f16_f32_e64 v5, -1
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
+v_cvt_f16_f32_e64 v5, v1 op_sel:[1]
+// GFX11: encoding: [0x05,0x08,0x8a,0xd5,0x01,0x01,0x00,0x00]
+
v_cvt_f16_f32_e64 v5, 0.5 mul:2
// GFX11: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
@@ -567,6 +570,9 @@ v_cvt_f32_f16_e64 v5, null
v_cvt_f32_f16_e64 v5, -1
// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00]
+v_cvt_f32_f16_e64 v5, v1 op_sel:[1]
+// GFX11: encoding: [0x05,0x08,0x8b,0xd5,0x01,0x01,0x00,0x00]
+
v_cvt_f32_f16_e64 v5, 0.5 mul:2
// GFX11: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s
index 438ebe20e88b4..7c25962358e09 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop1-fake16.s
@@ -570,6 +570,9 @@ v_cvt_f16_f32_e64 v5, null
v_cvt_f16_f32_e64 v5, -1
// GFX12: encoding: [0x05,0x00,0x8a,0xd5,0xc1,0x00,0x00,0x00]
+v_cvt_f16_f32_e64 v5, v1 op_sel:[1]
+// GFX12: encoding: [0x05,0x08,0x8a,0xd5,0x01,0x01,0x00,0x00]
+
v_cvt_f16_f32_e64 v5, 0.5 mul:2
// GFX12: encoding: [0x05,0x00,0x8a,0xd5,0xf0,0x00,0x00,0x08]
@@ -705,6 +708,9 @@ v_cvt_f32_f16_e64 v5, null
v_cvt_f32_f16_e64 v5, -1
// GFX12: encoding: [0x05,0x00,0x8b,0xd5,0xc1,0x00,0x00,0x00]
+v_cvt_f32_f16_e64 v5, v1 op_sel:[1]
+// GFX12: encoding: [0x05,0x08,0x8b,0xd5,0x01,0x01,0x00,0x00]
+
v_cvt_f32_f16_e64 v5, 0.5 mul:2
// GFX12: encoding: [0x05,0x00,0x8b,0xd5,0xf0,0x00,0x00,0x08]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
index 282ff229c57e6..4f1b40f0989ff 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16_from_vop1.txt
@@ -448,7 +448,7 @@
0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
# GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -644,7 +644,7 @@
0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
index 5995762ce6ff1..228cb94582473 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8_from_vop1.txt
@@ -124,7 +124,7 @@
0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX11-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -200,7 +200,7 @@
0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX11-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX11: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
index d7e73909286a2..a1b2ac8477783 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_from_vop1.txt
@@ -504,7 +504,7 @@
0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
# GFX11-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
-# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX11-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00
# GFX11-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -699,7 +699,7 @@
0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00
# GFX11-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
-# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX11-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00
# GFX11: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
index 3ccf6feac4cca..8cc1d769b5307 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1.txt
@@ -556,7 +556,7 @@
0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf
# GFX12-REAL16: v_cvt_f16_f32_e64 v255.h, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
-# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+# GFX12-FAKE16: v_cvt_f16_f32_e64 v255, -|0xaf123456| op_sel:[0,1] clamp div:2 ; encoding: [0xff,0xc1,0x8a,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00
# GFX12-REAL16: v_cvt_f16_i16_e64 v5.l, v1.l ; encoding: [0x05,0x00,0xd1,0xd5,0x01,0x01,0x00,0x00]
@@ -751,7 +751,7 @@
0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00
# GFX12-REAL16: v_cvt_f32_f16_e64 v5, v255.h op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255 ; encoding: [0x05,0x00,0x8b,0xd5,0xff,0x01,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f32_f16_e64 v5, v255 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd5,0xff,0x01,0x00,0x00]
0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00
# GFX12: v_cvt_f32_f64_e64 v5, v[1:2] ; encoding: [0x05,0x00,0x8f,0xd5,0x01,0x01,0x00,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
index a020b0ae46a37..8982c399c6af7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp16.txt
@@ -478,7 +478,7 @@
0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
# GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
# GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xd1,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
@@ -674,7 +674,7 @@
0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30
# GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
-# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
+# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xfa,0x00,0x00,0x38,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff
# GFX12: v_cvt_f32_i32_e64_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x85,0xd5,0xfa,0x00,0x00,0x00,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
index ad3c673b4e390..81565cae6ea04 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop1_dpp8.txt
@@ -154,7 +154,7 @@
0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX12-REAL16: v_cvt_f16_f32_e64_dpp v255.h, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f16_f32_e64_dpp v255, -|v255| op_sel:[0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc1,0x8a,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX12-REAL16: v_cvt_f16_i16_e64_dpp v5.l, v1.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0xd1,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
@@ -230,7 +230,7 @@
0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00
# GFX12-REAL16: v_cvt_f32_f16_e64_dpp v255, -|v255.h| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
-# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
+# GFX12-FAKE16: v_cvt_f32_f16_e64_dpp v255, -|v255| op_sel:[1,0] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x89,0x8b,0xd5,0xea,0x00,0x00,0x38,0xff,0x00,0x00,0x00]
0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05
# GFX12: v_cvt_f32_i32_e64_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x85,0xd5,0xe9,0x00,0x00,0x00,0x01,0x77,0x39,0x05]
More information about the llvm-commits
mailing list