[llvm] [AMDGPU][GlobalIsel] Add register bank legalization rules for fptoi and itofp (PR #176300)

Syadus Sefat via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 3 07:05:46 PST 2026


https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/176300

>From 18c9d0caae02b7612aa8c2471c9bd3d10dca1040 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Thu, 15 Jan 2026 20:17:13 -0600
Subject: [PATCH 1/2] [AMDGPU][GlobalIsel] Add register bank legalization rules
 for fptoi and itofp

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |   33 +-
 .../AMDGPU/GlobalISel/fp-int-conversions.ll   | 1201 +++++++++++++++++
 .../GlobalISel/regbankselect-fptosi.mir       |    3 +-
 .../GlobalISel/regbankselect-fptoui.mir       |    3 +-
 .../GlobalISel/regbankselect-sitofp.mir       |    3 +-
 .../GlobalISel/regbankselect-uitofp.mir       |    3 +-
 6 files changed, 1234 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 82e937cdc4ed3..bc7bb4a994194 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1182,14 +1182,37 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
       .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
 
-  addRulesForGOpcs({G_FPTOUI})
+  addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
+      .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
+      .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
+      .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}})
+      .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
+      .Any({{UniS16, S64}, {{UniInVgprS16}, {Vgpr64}}})
+      .Any({{DivS16, S64}, {{Vgpr16}, {Vgpr64}}})
+      .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
+      .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
+      .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
       .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
-      .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
-
-  addRulesForGOpcs({G_UITOFP})
+      .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
       .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
+      .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
+      .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
+      .Any({{UniS64, S16}, {{UniInVgprS64}, {Vgpr16}}})
+      .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr16}}});
+
+  addRulesForGOpcs({G_UITOFP, G_SITOFP})
+      .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
+      .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
+      .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
+      .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
+      .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
       .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
-      .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
+      .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
+      .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
+      .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
+      .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
+      .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
+      .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
 
   addRulesForGOpcs({G_FPEXT})
       .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
new file mode 100644
index 0000000000000..3134ec956e685
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
@@ -0,0 +1,1201 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX10
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX11
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12
+
+define amdgpu_ps void @s_fptoui_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f16_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u16_f16_e32 v2, s0
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f16_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u16_f16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f16_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_u16_f16_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui half %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f16_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i16_f16_e32 v2, s0
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f16_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i16_f16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f16_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_i16_f16_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi half %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f16_to_i16(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f16_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u16_f16_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f16_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u16_f16_e32 v0.l, v0.l
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f16_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_u16_f16_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui half %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f16_to_i16(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f16_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i16_f16_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f16_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i16_f16_e32 v0.l, v0.l
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f16_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_i16_f16_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi half %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptoui_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f32_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, s0
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f32_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f32_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui float %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f32_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, s0
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f32_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f32_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi float %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f32_to_i16(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f32_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f32_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f32_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui float %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f32_to_i16(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f32_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f32_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f32_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi float %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptoui_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f64_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f64_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f64_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui double %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f64_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f64_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f64_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi double %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f64_to_i16(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f64_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f64_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f64_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX12-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui double %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f64_to_i16(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f64_to_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    global_store_short v[2:3], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f64_to_i16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f64_to_i16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX12-NEXT:    global_store_b16 v[2:3], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi double %x to i16
+  store i16 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptoui_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f16_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f16_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f16_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui half %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f16_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f16_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f16_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi half %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f16_to_i32(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f16_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f16_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f16_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui half %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f16_to_i32(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f16_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f16_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f16_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi half %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptoui_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f32_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, s0
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f32_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, s0
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f32_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui float %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f32_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, s0
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f32_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, s0
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f32_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi float %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f32_to_i32(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f32_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f32_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f32_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui float %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f32_to_i32(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f32_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f32_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f32_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi float %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptoui_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f64_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f64_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f64_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui double %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f64_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f64_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f64_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi double %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f64_to_i32(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f64_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f64_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f64_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX12-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui double %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f64_to_i32(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f64_to_i32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT:    global_store_dword v[2:3], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f64_to_i32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f64_to_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX12-NEXT:    global_store_b32 v[2:3], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi double %x to i32
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptoui_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f16_to_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT:    s_mov_b32 s1, 0
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX10-NEXT:    v_mov_b32_e32 v3, s1
+; GFX10-NEXT:    v_mov_b32_e32 v2, s0
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f16_to_i64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT:    s_mov_b32 s1, 0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f16_to_i64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
+; GFX12-NEXT:    s_mov_b32 s1, 0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
+; GFX12-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui half %x to i64
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_fptosi_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f16_to_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
+; GFX10-NEXT:    v_mov_b32_e32 v3, s1
+; GFX10-NEXT:    v_mov_b32_e32 v2, s0
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f16_to_i64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, v2
+; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
+; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f16_to_i64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
+; GFX12-NEXT:    s_ashr_i32 s1, s0, 31
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
+; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi half %x to i64
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptoui_f16_to_i64(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f16_to_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v0
+; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f16_to_i64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v0
+; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f16_to_i64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_mov_b32_e32 v1, 0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT:    s_endpgm
+  %result = fptoui half %x to i64
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_fptosi_f16_to_i64(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f16_to_i64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT:    v_cvt_i32_f32_e32 v3, v0
+; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
+; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f16_to_i64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_i32_f32_e32 v3, v0
+; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
+; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f16_to_i64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT:    s_endpgm
+  %result = fptosi half %x to i64
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_uitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i16_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f16_u16_e32 v2, s0
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i16_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f16_u16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i16_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    v_cvt_f16_u16_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i16 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_sitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i16_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f16_i16_e32 v2, s0
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i16_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f16_i16_e32 v2.l, s0
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i16_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    v_cvt_f16_i16_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i16 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_uitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i16_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f16_u16_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i16_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f16_u16_e32 v0.l, v0.l
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i16_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    v_cvt_f16_u16_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i16 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_sitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i16_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f16_i16_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i16_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f16_i16_e32 v0.l, v0.l
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i16_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    v_cvt_f16_i16_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i16 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_uitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i32_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s0
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i32_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_u32_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i32_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_u32 s0, s0
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT:    s_cvt_f16_f32 s0, s0
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i32 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_sitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i32_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; GFX10-NEXT:    global_store_short v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i32_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i32_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_i32 s0, s0
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT:    s_cvt_f16_f32 s0, s0
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i32 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_uitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i32_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i32_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i32_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i32 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_sitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i32_to_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT:    global_store_short v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i32_to_f16:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i32_to_f16:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i32 %x to half
+  store half %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_uitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i32_to_f32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s0
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i32_to_f32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_u32_e32 v2, s0
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i32_to_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_u32 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i32 %x to float
+  store float %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_sitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i32_to_f32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX10-NEXT:    global_store_dword v[0:1], v2, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i32_to_f32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i32_to_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    s_cvt_f32_i32 s0, s0
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v2, s0
+; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i32 %x to float
+  store float %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_uitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i32_to_f32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i32_to_f32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i32_to_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_f32_u32_e32 v0, v0
+; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i32 %x to float
+  store float %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_sitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i32_to_f32:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX10-NEXT:    global_store_dword v[1:2], v0, off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i32_to_f32:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i32_to_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i32 %x to float
+  store float %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_uitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i32_to_f64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f64_u32_e32 v[2:3], s0
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i32_to_f64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f64_u32_e32 v[2:3], s0
+; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i32_to_f64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f64_u32_e32 v[2:3], s0
+; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i32 %x to double
+  store double %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @s_sitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i32_to_f64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f64_i32_e32 v[2:3], s0
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i32_to_f64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f64_i32_e32 v[2:3], s0
+; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i32_to_f64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_cvt_f64_i32_e32 v[2:3], s0
+; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i32 %x to double
+  store double %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_uitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i32_to_f64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f64_u32_e32 v[3:4], v0
+; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i32_to_f64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f64_u32_e32 v[3:4], v0
+; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i32_to_f64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_f64_u32_e32 v[0:1], v0
+; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT:    s_endpgm
+  %result = uitofp i32 %x to double
+  store double %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_ps void @v_sitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i32_to_f64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cvt_f64_i32_e32 v[3:4], v0
+; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i32_to_f64:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_cvt_f64_i32_e32 v[3:4], v0
+; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i32_to_f64:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT:    v_cvt_f64_i32_e32 v[0:1], v0
+; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT:    s_endpgm
+  %result = sitofp i32 %x to double
+  store double %result, ptr addrspace(1) %out
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir
index c690f8439098f..f77a686d4e5c3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
 
 ---
 name: fptosi_s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir
index 17e656340f780..86cf60d92b6c6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
 
 ---
 name: fptoui_s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir
index 66e0d3db24112..4252e50ac064e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir
@@ -1,6 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
 
 ---
 name: sitofp_s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir
index e95be13c47d3b..3e46839532917 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
 
 ---
 name: uitofp_s
@@ -14,6 +14,7 @@ body: |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
     ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY1]](s32)
+    ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UITOFP]]
     %0:_(s32) = COPY $sgpr0
     %1:_(s32) = G_UITOFP %0
 ...

>From acb507c15cfa459e3e87d44b77f30de91433791b Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Thu, 29 Jan 2026 13:39:49 -0600
Subject: [PATCH 2/2]  [AMDGPU][GlobalIsel] Add register bank legalization
 rules for fptoi and itofp    -- updated test

---
 .../AMDGPU/AMDGPURegBankLegalizeRules.cpp     |   10 +-
 .../AMDGPU/GlobalISel/fp-int-conversions.ll   | 1189 +++--------------
 2 files changed, 207 insertions(+), 992 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index bc7bb4a994194..5d653116970f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1185,10 +1185,6 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
   addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
       .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
       .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
-      .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}})
-      .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
-      .Any({{UniS16, S64}, {{UniInVgprS16}, {Vgpr64}}})
-      .Any({{DivS16, S64}, {{Vgpr16}, {Vgpr64}}})
       .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
       .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
       .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
@@ -1196,9 +1192,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
       .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
       .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
-      .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
-      .Any({{UniS64, S16}, {{UniInVgprS64}, {Vgpr16}}})
-      .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr16}}});
+      .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
 
   addRulesForGOpcs({G_UITOFP, G_SITOFP})
       .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
@@ -1209,8 +1203,6 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
       .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
       .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
-      .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
-      .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
       .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
       .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
index 3134ec956e685..317778228b396 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
@@ -1,1201 +1,424 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX10
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX11
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_wait" --filter-out "s_delay_alu" --filter-out "endpgm" --filter-out "store" --version 6
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,FAKE16,PREGFX12,PREGFX12-FAKE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,PREGFX12,TRUE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,FAKE16,PREGFX12,PREGFX12-FAKE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,FAKE16,GFX12
 
 define amdgpu_ps void @s_fptoui_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f16_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u16_f16_e32 v2, s0
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
+; FAKE16-LABEL: s_fptoui_f16_to_i16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_u16_f16_e32 v2, s0
 ;
-; GFX11-LABEL: s_fptoui_f16_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u16_f16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f16_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_u16_f16_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: s_fptoui_f16_to_i16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_u16_f16_e32 v2.l, s0
   %result = fptoui half %x to i16
   store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_fptosi_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f16_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i16_f16_e32 v2, s0
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f16_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i16_f16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; FAKE16-LABEL: s_fptosi_f16_to_i16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_i16_f16_e32 v2, s0
 ;
-; GFX12-LABEL: s_fptosi_f16_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_i16_f16_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: s_fptosi_f16_to_i16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_i16_f16_e32 v2.l, s0
   %result = fptosi half %x to i16
   store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptoui_f16_to_i16(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f16_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u16_f16_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
+; FAKE16-LABEL: v_fptoui_f16_to_i16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_u16_f16_e32 v0, v0
 ;
-; GFX11-LABEL: v_fptoui_f16_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u16_f16_e32 v0.l, v0.l
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f16_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_u16_f16_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: v_fptoui_f16_to_i16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_u16_f16_e32 v0.l, v0.l
   %result = fptoui half %x to i16
   store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptosi_f16_to_i16(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f16_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i16_f16_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f16_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i16_f16_e32 v0.l, v0.l
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
+; FAKE16-LABEL: v_fptosi_f16_to_i16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_i16_f16_e32 v0, v0
 ;
-; GFX12-LABEL: v_fptosi_f16_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_i16_f16_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: v_fptosi_f16_to_i16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_i16_f16_e32 v0.l, v0.l
   %result = fptosi half %x to i16
   store i16 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_ps void @s_fptoui_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f32_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, s0
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f32_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f32_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptoui float %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @s_fptosi_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f32_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, s0
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f32_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f32_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptosi float %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @v_fptoui_f32_to_i16(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f32_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f32_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f32_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptoui float %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @v_fptosi_f32_to_i16(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f32_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f32_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f32_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptosi float %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @s_fptoui_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f64_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f64_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f64_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptoui double %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @s_fptosi_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f64_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f64_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f64_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptosi double %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @v_fptoui_f64_to_i16(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f64_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    global_store_short v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f64_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f64_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX12-NEXT:    global_store_b16 v[2:3], v0, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptoui double %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @v_fptosi_f64_to_i16(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f64_to_i16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    global_store_short v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f64_to_i16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    global_store_b16 v[2:3], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f64_to_i16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-NEXT:    global_store_b16 v[2:3], v0, off
-; GFX12-NEXT:    s_endpgm
-  %result = fptosi double %x to i16
-  store i16 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_ps void @s_fptoui_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f16_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f16_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; PREGFX12-LABEL: s_fptoui_f16_to_i32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    v_cvt_f32_f16_e32 v2, s0
+; PREGFX12:    v_cvt_u32_f32_e32 v2, v2
 ;
 ; GFX12-LABEL: s_fptoui_f16_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_f32_f16 s0, s0
+; GFX12:    s_cvt_u32_f32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = fptoui half %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_fptosi_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f16_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f16_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; PREGFX12-LABEL: s_fptosi_f16_to_i32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    v_cvt_f32_f16_e32 v2, s0
+; PREGFX12:    v_cvt_i32_f32_e32 v2, v2
 ;
 ; GFX12-LABEL: s_fptosi_f16_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_f32_f16 s0, s0
+; GFX12:    s_cvt_i32_f32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = fptosi half %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptoui_f16_to_i32(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f16_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f16_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f16_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; FAKE16-LABEL: v_fptoui_f16_to_i32:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f32_f16_e32 v0, v0
+; FAKE16:    v_cvt_u32_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_fptoui_f16_to_i32:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f32_f16_e32 v0, v0.l
+; TRUE16:    v_cvt_u32_f32_e32 v0, v0
   %result = fptoui half %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptosi_f16_to_i32(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f16_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f16_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f16_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; FAKE16-LABEL: v_fptosi_f16_to_i32:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f32_f16_e32 v0, v0
+; FAKE16:    v_cvt_i32_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_fptosi_f16_to_i32:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f32_f16_e32 v0, v0.l
+; TRUE16:    v_cvt_i32_f32_e32 v0, v0
   %result = fptosi half %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_fptoui_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f32_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, s0
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f32_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, s0
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; PREGFX12-LABEL: s_fptoui_f32_to_i32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    v_cvt_u32_f32_e32 v2, s0
 ;
 ; GFX12-LABEL: s_fptoui_f32_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_u32_f32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = fptoui float %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_fptosi_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f32_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, s0
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f32_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, s0
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; PREGFX12-LABEL: s_fptosi_f32_to_i32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    v_cvt_i32_f32_e32 v2, s0
 ;
 ; GFX12-LABEL: s_fptosi_f32_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_i32_f32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = fptosi float %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptoui_f32_to_i32(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f32_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f32_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f32_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_fptoui_f32_to_i32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_u32_f32_e32 v0, v0
   %result = fptoui float %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptosi_f32_to_i32(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f32_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f32_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f32_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_fptosi_f32_to_i32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_i32_f32_e32 v0, v0
   %result = fptosi float %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_fptoui_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f64_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f64_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f64_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: s_fptoui_f64_to_i32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_u32_f64_e32 v2, s[0:1]
   %result = fptoui double %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_fptosi_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f64_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f64_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f64_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: s_fptosi_f64_to_i32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_i32_f64_e32 v2, s[0:1]
   %result = fptosi double %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptoui_f64_to_i32(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f64_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    global_store_dword v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f64_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f64_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX12-NEXT:    global_store_b32 v[2:3], v0, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_fptoui_f64_to_i32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_u32_f64_e32 v0, v[0:1]
   %result = fptoui double %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_fptosi_f64_to_i32(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f64_to_i32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX10-NEXT:    global_store_dword v[2:3], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f64_to_i32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX11-NEXT:    global_store_b32 v[2:3], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f64_to_i32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-NEXT:    global_store_b32 v[2:3], v0, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_fptosi_f64_to_i32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_i32_f64_e32 v0, v[0:1]
   %result = fptosi double %x to i32
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_ps void @s_fptoui_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f16_to_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT:    s_mov_b32 s1, 0
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX10-NEXT:    v_mov_b32_e32 v3, s1
-; GFX10-NEXT:    v_mov_b32_e32 v2, s0
-; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f16_to_i64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT:    s_mov_b32 s1, 0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f16_to_i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
-; GFX12-NEXT:    s_mov_b32 s1, 0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT:    s_cvt_u32_f32 s0, s0
-; GFX12-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT:    s_endpgm
-  %result = fptoui half %x to i64
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @s_fptosi_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f16_to_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX10-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX10-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX10-NEXT:    v_mov_b32_e32 v3, s1
-; GFX10-NEXT:    v_mov_b32_e32 v2, s0
-; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f16_to_i64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX11-NEXT:    v_readfirstlane_b32 s0, v2
-; GFX11-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT:    v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f16_to_i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_f16 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT:    s_cvt_i32_f32 s0, s0
-; GFX12-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
-; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT:    s_endpgm
-  %result = fptosi half %x to i64
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @v_fptoui_f16_to_i64(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f16_to_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT:    v_mov_b32_e32 v4, 0
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v0
-; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f16_to_i64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT:    v_mov_b32_e32 v4, 0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT:    v_cvt_u32_f32_e32 v3, v0
-; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f16_to_i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_mov_b32_e32 v1, 0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3)
-; GFX12-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT:    s_endpgm
-  %result = fptoui half %x to i64
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
-define amdgpu_ps void @v_fptosi_f16_to_i64(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f16_to_i64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT:    v_cvt_i32_f32_e32 v3, v0
-; GFX10-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f16_to_i64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_i32_f32_e32 v3, v0
-; GFX11-NEXT:    v_ashrrev_i32_e32 v4, 31, v3
-; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f16_to_i64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT:    s_endpgm
-  %result = fptosi half %x to i64
-  store i64 %result, ptr addrspace(1) %out
-  ret void
-}
-
 define amdgpu_ps void @s_uitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i16_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f16_u16_e32 v2, s0
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
+; FAKE16-LABEL: s_uitofp_i16_to_f16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f16_u16_e32 v2, s0
 ;
-; GFX11-LABEL: s_uitofp_i16_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f16_u16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_uitofp_i16_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    v_cvt_f16_u16_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: s_uitofp_i16_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f16_u16_e32 v2.l, s0
   %result = uitofp i16 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_sitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i16_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f16_i16_e32 v2, s0
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_sitofp_i16_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f16_i16_e32 v2.l, s0
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; FAKE16-LABEL: s_sitofp_i16_to_f16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f16_i16_e32 v2, s0
 ;
-; GFX12-LABEL: s_sitofp_i16_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    v_cvt_f16_i16_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: s_sitofp_i16_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f16_i16_e32 v2.l, s0
   %result = sitofp i16 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_uitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i16_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f16_u16_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
+; FAKE16-LABEL: v_uitofp_i16_to_f16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f16_u16_e32 v0, v0
 ;
-; GFX11-LABEL: v_uitofp_i16_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f16_u16_e32 v0.l, v0.l
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i16_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    v_cvt_f16_u16_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: v_uitofp_i16_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f16_u16_e32 v0.l, v0.l
   %result = uitofp i16 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_sitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i16_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f16_i16_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i16_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f16_i16_e32 v0.l, v0.l
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
+; FAKE16-LABEL: v_sitofp_i16_to_f16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f16_i16_e32 v0, v0
 ;
-; GFX12-LABEL: v_sitofp_i16_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    v_cvt_f16_i16_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; TRUE16-LABEL: v_sitofp_i16_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f16_i16_e32 v0.l, v0.l
   %result = sitofp i16 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_uitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i32_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s0
-; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
+; PREGFX12-FAKE16-LABEL: s_uitofp_i32_to_f16:
+; PREGFX12-FAKE16:  ; %bb.0:
+; PREGFX12-FAKE16:    v_cvt_f32_u32_e32 v2, s0
+; PREGFX12-FAKE16:    v_cvt_f16_f32_e32 v2, v2
 ;
-; GFX11-LABEL: s_uitofp_i32_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_u32_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; TRUE16-LABEL: s_uitofp_i32_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f32_u32_e32 v2, s0
+; TRUE16:    v_cvt_f16_f32_e32 v2.l, v2
 ;
 ; GFX12-LABEL: s_uitofp_i32_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_u32 s0, s0
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT:    s_cvt_f16_f32 s0, s0
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_f32_u32 s0, s0
+; GFX12:    s_cvt_f16_f32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = uitofp i32 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_sitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i32_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_i32_e32 v2, s0
-; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
-; GFX10-NEXT:    global_store_short v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
+; PREGFX12-FAKE16-LABEL: s_sitofp_i32_to_f16:
+; PREGFX12-FAKE16:  ; %bb.0:
+; PREGFX12-FAKE16:    v_cvt_f32_i32_e32 v2, s0
+; PREGFX12-FAKE16:    v_cvt_f16_f32_e32 v2, v2
 ;
-; GFX11-LABEL: s_sitofp_i32_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_i32_e32 v2, s0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; TRUE16-LABEL: s_sitofp_i32_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f32_i32_e32 v2, s0
+; TRUE16:    v_cvt_f16_f32_e32 v2.l, v2
 ;
 ; GFX12-LABEL: s_sitofp_i32_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_i32 s0, s0
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT:    s_cvt_f16_f32 s0, s0
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b16 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_f32_i32 s0, s0
+; GFX12:    s_cvt_f16_f32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = sitofp i32 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_uitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i32_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_uitofp_i32_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i32_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; FAKE16-LABEL: v_uitofp_i32_to_f16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f32_u32_e32 v0, v0
+; FAKE16:    v_cvt_f16_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_uitofp_i32_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f32_u32_e32 v0, v0
+; TRUE16:    v_cvt_f16_f32_e32 v0.l, v0
   %result = uitofp i32 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_sitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i32_to_f16:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT:    global_store_short v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i32_to_f16:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-NEXT:    global_store_b16 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_sitofp_i32_to_f16:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX12-NEXT:    global_store_b16 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; FAKE16-LABEL: v_sitofp_i32_to_f16:
+; FAKE16:  ; %bb.0:
+; FAKE16:    v_cvt_f32_i32_e32 v0, v0
+; FAKE16:    v_cvt_f16_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_sitofp_i32_to_f16:
+; TRUE16:  ; %bb.0:
+; TRUE16:    v_cvt_f32_i32_e32 v0, v0
+; TRUE16:    v_cvt_f16_f32_e32 v0.l, v0
   %result = sitofp i32 %x to half
   store half %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_uitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i32_to_f32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s0
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_uitofp_i32_to_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_u32_e32 v2, s0
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; PREGFX12-LABEL: s_uitofp_i32_to_f32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    v_cvt_f32_u32_e32 v2, s0
 ;
 ; GFX12-LABEL: s_uitofp_i32_to_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_u32 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_f32_u32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = uitofp i32 %x to float
   store float %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_sitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i32_to_f32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_i32_e32 v2, s0
-; GFX10-NEXT:    global_store_dword v[0:1], v2, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_sitofp_i32_to_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_i32_e32 v2, s0
-; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX11-NEXT:    s_endpgm
+; PREGFX12-LABEL: s_sitofp_i32_to_f32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    v_cvt_f32_i32_e32 v2, s0
 ;
 ; GFX12-LABEL: s_sitofp_i32_to_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    s_cvt_f32_i32 s0, s0
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT:    v_mov_b32_e32 v2, s0
-; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
-; GFX12-NEXT:    s_endpgm
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_f32_i32 s0, s0
+; GFX12:    v_mov_b32_e32 v2, s0
   %result = sitofp i32 %x to float
   store float %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_uitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i32_to_f32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_uitofp_i32_to_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i32_to_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_f32_u32_e32 v0, v0
-; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_uitofp_i32_to_f32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_f32_u32_e32 v0, v0
   %result = uitofp i32 %x to float
   store float %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_sitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i32_to_f32:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX10-NEXT:    global_store_dword v[1:2], v0, off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i32_to_f32:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_sitofp_i32_to_f32:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; GFX12-NEXT:    global_store_b32 v[4:5], v0, off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_sitofp_i32_to_f32:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_f32_i32_e32 v0, v0
   %result = sitofp i32 %x to float
   store float %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_uitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i32_to_f64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f64_u32_e32 v[2:3], s0
-; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_uitofp_i32_to_f64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f64_u32_e32 v[2:3], s0
-; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_uitofp_i32_to_f64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f64_u32_e32 v[2:3], s0
-; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: s_uitofp_i32_to_f64:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_f64_u32_e32 v[2:3], s0
   %result = uitofp i32 %x to double
   store double %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @s_sitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i32_to_f64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f64_i32_e32 v[2:3], s0
-; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: s_sitofp_i32_to_f64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f64_i32_e32 v[2:3], s0
-; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: s_sitofp_i32_to_f64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_cvt_f64_i32_e32 v[2:3], s0
-; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: s_sitofp_i32_to_f64:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_f64_i32_e32 v[2:3], s0
   %result = sitofp i32 %x to double
   store double %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_uitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i32_to_f64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f64_u32_e32 v[3:4], v0
-; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_uitofp_i32_to_f64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f64_u32_e32 v[3:4], v0
-; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i32_to_f64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_f64_u32_e32 v[0:1], v0
-; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_uitofp_i32_to_f64:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_f64_u32_e32 v[3:4], v0
   %result = uitofp i32 %x to double
   store double %result, ptr addrspace(1) %out
   ret void
 }
 
 define amdgpu_ps void @v_sitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i32_to_f64:
-; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_cvt_f64_i32_e32 v[3:4], v0
-; GFX10-NEXT:    global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT:    s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i32_to_f64:
-; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_cvt_f64_i32_e32 v[3:4], v0
-; GFX11-NEXT:    global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT:    s_endpgm
-;
-; GFX12-LABEL: v_sitofp_i32_to_f64:
-; GFX12:       ; %bb.0:
-; GFX12-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT:    v_cvt_f64_i32_e32 v[0:1], v0
-; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT:    s_endpgm
+; GCN-LABEL: v_sitofp_i32_to_f64:
+; GCN:  ; %bb.0:
+; GCN:    v_cvt_f64_i32_e32 v[3:4], v0
   %result = sitofp i32 %x to double
   store double %result, ptr addrspace(1) %out
   ret void
 }
+
+define amdgpu_ps float @fpext_hif16_to_32(<2 x half> inreg %val) {
+; PREGFX12-LABEL: fpext_hif16_to_32:
+; PREGFX12:  ; %bb.0:
+; PREGFX12:    s_lshr_b32 s0, s0, 16
+; PREGFX12:    v_cvt_f32_f16_e32 v0, s0
+; PREGFX12:    ; return to shader part epilog
+;
+; GFX12-LABEL: fpext_hif16_to_32:
+; GFX12:  ; %bb.0:
+; GFX12:    s_cvt_hi_f32_f16 s0, s0
+; GFX12:    v_mov_b32_e32 v0, s0
+; GFX12:    ; return to shader part epilog
+  %hielt = extractelement <2 x half> %val, i32 1
+  %res = fpext half %hielt to float
+  ret float %res
+}



More information about the llvm-commits mailing list