[llvm] [AMDGPU][GlobalIsel] Add register bank legalization rules for fptoi and itofp (PR #176300)
Syadus Sefat via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 3 07:05:46 PST 2026
https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/176300
>From 18c9d0caae02b7612aa8c2471c9bd3d10dca1040 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Thu, 15 Jan 2026 20:17:13 -0600
Subject: [PATCH 1/2] [AMDGPU][GlobalIsel] Add register bank legalization rules
for fptoi and itofp
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 33 +-
.../AMDGPU/GlobalISel/fp-int-conversions.ll | 1201 +++++++++++++++++
.../GlobalISel/regbankselect-fptosi.mir | 3 +-
.../GlobalISel/regbankselect-fptoui.mir | 3 +-
.../GlobalISel/regbankselect-sitofp.mir | 3 +-
.../GlobalISel/regbankselect-uitofp.mir | 3 +-
6 files changed, 1234 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 82e937cdc4ed3..bc7bb4a994194 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1182,14 +1182,37 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
.Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
- addRulesForGOpcs({G_FPTOUI})
+ addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
+ .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
+ .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
+ .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}})
+ .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
+ .Any({{UniS16, S64}, {{UniInVgprS16}, {Vgpr64}}})
+ .Any({{DivS16, S64}, {{Vgpr16}, {Vgpr64}}})
+ .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
+ .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
+ .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
- .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
-
- addRulesForGOpcs({G_UITOFP})
+ .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
+ .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
+ .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
+ .Any({{UniS64, S16}, {{UniInVgprS64}, {Vgpr16}}})
+ .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr16}}});
+
+ addRulesForGOpcs({G_UITOFP, G_SITOFP})
+ .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
+ .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
+ .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
+ .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
+ .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
- .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
+ .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
+ .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
+ .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
+ .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
+ .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
+ .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
addRulesForGOpcs({G_FPEXT})
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
new file mode 100644
index 0000000000000..3134ec956e685
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
@@ -0,0 +1,1201 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX10
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX11
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12
+
+define amdgpu_ps void @s_fptoui_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f16_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u16_f16_e32 v2, s0
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f16_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u16_f16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f16_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_u16_f16_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui half %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f16_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i16_f16_e32 v2, s0
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f16_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i16_f16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f16_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_i16_f16_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi half %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f16_to_i16(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f16_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u16_f16_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f16_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u16_f16_e32 v0.l, v0.l
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f16_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_u16_f16_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui half %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f16_to_i16(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f16_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i16_f16_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f16_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i16_f16_e32 v0.l, v0.l
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f16_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_i16_f16_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi half %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptoui_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f32_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f32_e32 v2, s0
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f32_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f32_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v2
+; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f32_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_u32_f32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui float %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f32_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f32_e32 v2, s0
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f32_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f32_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v2
+; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f32_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_i32_f32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi float %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f32_to_i16(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f32_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f32_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f32_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui float %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f32_to_i16(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f32_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f32_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f32_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi float %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptoui_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f64_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f64_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v2
+; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f64_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui double %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f64_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f64_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_readfirstlane_b32 s0, v2
+; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f64_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi double %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f64_to_i16(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f64_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f64_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f64_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX12-NEXT: global_store_b16 v[2:3], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui double %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f64_to_i16(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f64_to_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: global_store_short v[2:3], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f64_to_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: global_store_b16 v[2:3], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f64_to_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX12-NEXT: global_store_b16 v[2:3], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi double %x to i16
+ store i16 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptoui_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f16_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f16_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f16_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_f16 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT: s_cvt_u32_f32 s0, s0
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui half %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f16_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f16_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f16_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_f16 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT: s_cvt_i32_f32 s0, s0
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi half %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f16_to_i32(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f16_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f16_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f16_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui half %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f16_to_i32(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f16_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f16_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f16_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi half %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptoui_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f32_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f32_e32 v2, s0
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f32_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f32_e32 v2, s0
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f32_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_u32_f32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui float %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f32_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f32_e32 v2, s0
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f32_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f32_e32 v2, s0
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f32_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_i32_f32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi float %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f32_to_i32(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f32_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f32_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f32_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui float %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f32_to_i32(float %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f32_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f32_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f32_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi float %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptoui_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f64_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f64_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f64_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui double %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f64_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f64_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f64_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi double %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f64_to_i32(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f64_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f64_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f64_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
+; GFX12-NEXT: global_store_b32 v[2:3], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui double %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f64_to_i32(double %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f64_to_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX10-NEXT: global_store_dword v[2:3], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f64_to_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX11-NEXT: global_store_b32 v[2:3], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f64_to_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
+; GFX12-NEXT: global_store_b32 v[2:3], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi double %x to i32
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptoui_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptoui_f16_to_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT: s_mov_b32 s1, 0
+; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX10-NEXT: v_readfirstlane_b32 s0, v2
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptoui_f16_to_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT: s_mov_b32 s1, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX11-NEXT: v_readfirstlane_b32 s0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptoui_f16_to_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_f16 s0, s0
+; GFX12-NEXT: s_mov_b32 s1, 0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT: s_cvt_u32_f32 s0, s0
+; GFX12-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui half %x to i64
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_fptosi_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_fptosi_f16_to_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX10-NEXT: v_readfirstlane_b32 s0, v2
+; GFX10-NEXT: s_ashr_i32 s1, s0, 31
+; GFX10-NEXT: v_mov_b32_e32 v3, s1
+; GFX10-NEXT: v_mov_b32_e32 v2, s0
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_fptosi_f16_to_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX11-NEXT: v_readfirstlane_b32 s0, v2
+; GFX11-NEXT: s_ashr_i32 s1, s0, 31
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
+; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_fptosi_f16_to_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_f16 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT: s_cvt_i32_f32 s0, s0
+; GFX12-NEXT: s_ashr_i32 s1, s0, 31
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi half %x to i64
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptoui_f16_to_i64(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptoui_f16_to_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0
+; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptoui_f16_to_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v0
+; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptoui_f16_to_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: s_endpgm
+ %result = fptoui half %x to i64
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_fptosi_f16_to_i64(half %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_fptosi_f16_to_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX10-NEXT: v_cvt_i32_f32_e32 v3, v0
+; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_fptosi_f16_to_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_i32_f32_e32 v3, v0
+; GFX11-NEXT: v_ashrrev_i32_e32 v4, 31, v3
+; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_fptosi_f16_to_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: s_endpgm
+ %result = fptosi half %x to i64
+ store i64 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_uitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i16_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f16_u16_e32 v2, s0
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i16_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f16_u16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i16_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: v_cvt_f16_u16_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i16 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_sitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i16_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f16_i16_e32 v2, s0
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i16_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f16_i16_e32 v2.l, s0
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i16_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: v_cvt_f16_i16_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i16 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_uitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i16_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f16_u16_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i16_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f16_u16_e32 v0.l, v0.l
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i16_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: v_cvt_f16_u16_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i16 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_sitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i16_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f16_i16_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i16_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f16_i16_e32 v0.l, v0.l
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i16_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: v_cvt_f16_i16_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i16 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_uitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i32_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i32_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i32_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_u32 s0, s0
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT: s_cvt_f16_f32 s0, s0
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i32 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_sitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i32_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX10-NEXT: global_store_short v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i32_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v2.l, v2
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i32_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_i32 s0, s0
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX12-NEXT: s_cvt_f16_f32 s0, s0
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b16 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i32 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_uitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i32_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i32_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i32_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i32 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_sitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i32_to_f16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-NEXT: global_store_short v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i32_to_f16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-NEXT: global_store_b16 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i32_to_f16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX12-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i32 %x to half
+ store half %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_uitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i32_to_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s0
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i32_to_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s0
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i32_to_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_u32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i32 %x to float
+ store float %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_sitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i32_to_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s0
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i32_to_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s0
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i32_to_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: s_cvt_f32_i32 s0, s0
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v2, s0
+; GFX12-NEXT: global_store_b32 v[0:1], v2, off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i32 %x to float
+ store float %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_uitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i32_to_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i32_to_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i32_to_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i32 %x to float
+ store float %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_sitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i32_to_f32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX10-NEXT: global_store_dword v[1:2], v0, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i32_to_f32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX11-NEXT: global_store_b32 v[1:2], v0, off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i32_to_f32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[4:5], v0, off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i32 %x to float
+ store float %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_uitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_uitofp_i32_to_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f64_u32_e32 v[2:3], s0
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uitofp_i32_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f64_u32_e32 v[2:3], s0
+; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_uitofp_i32_to_f64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f64_u32_e32 v[2:3], s0
+; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i32 %x to double
+ store double %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @s_sitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: s_sitofp_i32_to_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f64_i32_e32 v[2:3], s0
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_sitofp_i32_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f64_i32_e32 v[2:3], s0
+; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: s_sitofp_i32_to_f64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_cvt_f64_i32_e32 v[2:3], s0
+; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i32 %x to double
+ store double %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_uitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_uitofp_i32_to_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
+; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uitofp_i32_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
+; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_uitofp_i32_to_f64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: s_endpgm
+ %result = uitofp i32 %x to double
+ store double %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @v_sitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
+; GFX10-LABEL: v_sitofp_i32_to_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_cvt_f64_i32_e32 v[3:4], v0
+; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_sitofp_i32_to_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_cvt_f64_i32_e32 v[3:4], v0
+; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: v_sitofp_i32_to_f64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
+; GFX12-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: s_endpgm
+ %result = sitofp i32 %x to double
+ store double %result, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir
index c690f8439098f..f77a686d4e5c3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptosi.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: fptosi_s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir
index 17e656340f780..86cf60d92b6c6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fptoui.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: fptoui_s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir
index 66e0d3db24112..4252e50ac064e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sitofp.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: sitofp_s
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir
index e95be13c47d3b..3e46839532917 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uitofp.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' -o - %s | FileCheck %s
---
name: uitofp_s
@@ -14,6 +14,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UITOFP]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_UITOFP %0
...
>From acb507c15cfa459e3e87d44b77f30de91433791b Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Thu, 29 Jan 2026 13:39:49 -0600
Subject: [PATCH 2/2] [AMDGPU][GlobalIsel] Add register bank legalization
rules for fptoi and itofp -- updated test
---
.../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 10 +-
.../AMDGPU/GlobalISel/fp-int-conversions.ll | 1189 +++--------------
2 files changed, 207 insertions(+), 992 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index bc7bb4a994194..5d653116970f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1185,10 +1185,6 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
.Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
.Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
- .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}})
- .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
- .Any({{UniS16, S64}, {{UniInVgprS16}, {Vgpr64}}})
- .Any({{DivS16, S64}, {{Vgpr16}, {Vgpr64}}})
.Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
.Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
.Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
@@ -1196,9 +1192,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
.Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
- .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
- .Any({{UniS64, S16}, {{UniInVgprS64}, {Vgpr16}}})
- .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr16}}});
+ .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
addRulesForGOpcs({G_UITOFP, G_SITOFP})
.Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
@@ -1209,8 +1203,6 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
- .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
- .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
.Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
index 3134ec956e685..317778228b396 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-int-conversions.ll
@@ -1,1201 +1,424 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX10
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX11
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1250 -new-reg-bank-select < %s | FileCheck %s --check-prefix=GFX12
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_wait" --filter-out "s_delay_alu" --filter-out "endpgm" --filter-out "store" --version 6
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,FAKE16,PREGFX12,PREGFX12-FAKE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,PREGFX12,TRUE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,FAKE16,PREGFX12,PREGFX12-FAKE16
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -new-reg-bank-select < %s | FileCheck %s --check-prefixes=GCN,FAKE16,GFX12
define amdgpu_ps void @s_fptoui_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f16_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u16_f16_e32 v2, s0
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
+; FAKE16-LABEL: s_fptoui_f16_to_i16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_u16_f16_e32 v2, s0
;
-; GFX11-LABEL: s_fptoui_f16_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u16_f16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f16_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_u16_f16_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: s_fptoui_f16_to_i16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_u16_f16_e32 v2.l, s0
%result = fptoui half %x to i16
store i16 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_fptosi_f16_to_i16(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f16_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i16_f16_e32 v2, s0
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f16_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i16_f16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; FAKE16-LABEL: s_fptosi_f16_to_i16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_i16_f16_e32 v2, s0
;
-; GFX12-LABEL: s_fptosi_f16_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_i16_f16_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: s_fptosi_f16_to_i16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_i16_f16_e32 v2.l, s0
%result = fptosi half %x to i16
store i16 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptoui_f16_to_i16(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f16_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u16_f16_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
+; FAKE16-LABEL: v_fptoui_f16_to_i16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_u16_f16_e32 v0, v0
;
-; GFX11-LABEL: v_fptoui_f16_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u16_f16_e32 v0.l, v0.l
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f16_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_u16_f16_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: v_fptoui_f16_to_i16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_u16_f16_e32 v0.l, v0.l
%result = fptoui half %x to i16
store i16 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptosi_f16_to_i16(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f16_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i16_f16_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f16_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i16_f16_e32 v0.l, v0.l
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
+; FAKE16-LABEL: v_fptosi_f16_to_i16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_i16_f16_e32 v0, v0
;
-; GFX12-LABEL: v_fptosi_f16_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_i16_f16_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: v_fptosi_f16_to_i16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_i16_f16_e32 v0.l, v0.l
%result = fptosi half %x to i16
store i16 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_ps void @s_fptoui_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f32_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f32_e32 v2, s0
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f32_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f32_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f32_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_u32_f32 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
- %result = fptoui float %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @s_fptosi_f32_to_i16(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f32_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f32_e32 v2, s0
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f32_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f32_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f32_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_i32_f32 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
- %result = fptosi float %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @v_fptoui_f32_to_i16(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f32_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f32_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f32_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
- %result = fptoui float %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @v_fptosi_f32_to_i16(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f32_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f32_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f32_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
- %result = fptosi float %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @s_fptoui_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f64_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f64_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f64_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
- %result = fptoui double %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @s_fptosi_f64_to_i16(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f64_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f64_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f64_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
- %result = fptosi double %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @v_fptoui_f64_to_i16(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f64_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: global_store_short v[2:3], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f64_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: global_store_b16 v[2:3], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f64_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX12-NEXT: global_store_b16 v[2:3], v0, off
-; GFX12-NEXT: s_endpgm
- %result = fptoui double %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @v_fptosi_f64_to_i16(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f64_to_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: global_store_short v[2:3], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f64_to_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: global_store_b16 v[2:3], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f64_to_i16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-NEXT: global_store_b16 v[2:3], v0, off
-; GFX12-NEXT: s_endpgm
- %result = fptosi double %x to i16
- store i16 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_ps void @s_fptoui_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f16_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f16_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; PREGFX12-LABEL: s_fptoui_f16_to_i32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: v_cvt_f32_f16_e32 v2, s0
+; PREGFX12: v_cvt_u32_f32_e32 v2, v2
;
; GFX12-LABEL: s_fptoui_f16_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_f16 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT: s_cvt_u32_f32 s0, s0
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_f32_f16 s0, s0
+; GFX12: s_cvt_u32_f32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = fptoui half %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_fptosi_f16_to_i32(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f16_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f16_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; PREGFX12-LABEL: s_fptosi_f16_to_i32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: v_cvt_f32_f16_e32 v2, s0
+; PREGFX12: v_cvt_i32_f32_e32 v2, v2
;
; GFX12-LABEL: s_fptosi_f16_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_f16 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT: s_cvt_i32_f32 s0, s0
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_f32_f16 s0, s0
+; GFX12: s_cvt_i32_f32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = fptosi half %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptoui_f16_to_i32(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f16_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f16_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: global_store_b32 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f16_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b32 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; FAKE16-LABEL: v_fptoui_f16_to_i32:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f32_f16_e32 v0, v0
+; FAKE16: v_cvt_u32_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_fptoui_f16_to_i32:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f32_f16_e32 v0, v0.l
+; TRUE16: v_cvt_u32_f32_e32 v0, v0
%result = fptoui half %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptosi_f16_to_i32(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f16_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f16_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT: global_store_b32 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f16_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b32 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; FAKE16-LABEL: v_fptosi_f16_to_i32:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f32_f16_e32 v0, v0
+; FAKE16: v_cvt_i32_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_fptosi_f16_to_i32:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f32_f16_e32 v0, v0.l
+; TRUE16: v_cvt_i32_f32_e32 v0, v0
%result = fptosi half %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_fptoui_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f32_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f32_e32 v2, s0
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f32_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f32_e32 v2, s0
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; PREGFX12-LABEL: s_fptoui_f32_to_i32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: v_cvt_u32_f32_e32 v2, s0
;
; GFX12-LABEL: s_fptoui_f32_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_u32_f32 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_u32_f32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = fptoui float %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_fptosi_f32_to_i32(float inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f32_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f32_e32 v2, s0
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f32_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f32_e32 v2, s0
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; PREGFX12-LABEL: s_fptosi_f32_to_i32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: v_cvt_i32_f32_e32 v2, s0
;
; GFX12-LABEL: s_fptosi_f32_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_i32_f32 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_i32_f32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = fptosi float %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptoui_f32_to_i32(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f32_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f32_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX11-NEXT: global_store_b32 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f32_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b32 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_fptoui_f32_to_i32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_u32_f32_e32 v0, v0
%result = fptoui float %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptosi_f32_to_i32(float %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f32_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f32_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX11-NEXT: global_store_b32 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f32_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b32 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_fptosi_f32_to_i32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_i32_f32_e32 v0, v0
%result = fptosi float %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_fptoui_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f64_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f64_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f64_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_u32_f64_e32 v2, s[0:1]
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: s_fptoui_f64_to_i32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_u32_f64_e32 v2, s[0:1]
%result = fptoui double %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_fptosi_f64_to_i32(double inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f64_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f64_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f64_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_i32_f64_e32 v2, s[0:1]
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: s_fptosi_f64_to_i32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_i32_f64_e32 v2, s[0:1]
%result = fptosi double %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptoui_f64_to_i32(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f64_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f64_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: global_store_b32 v[2:3], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f64_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_u32_f64_e32 v0, v[0:1]
-; GFX12-NEXT: global_store_b32 v[2:3], v0, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_fptoui_f64_to_i32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_u32_f64_e32 v0, v[0:1]
%result = fptoui double %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_fptosi_f64_to_i32(double %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f64_to_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX10-NEXT: global_store_dword v[2:3], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f64_to_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX11-NEXT: global_store_b32 v[2:3], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f64_to_i32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_i32_f64_e32 v0, v[0:1]
-; GFX12-NEXT: global_store_b32 v[2:3], v0, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_fptosi_f64_to_i32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_i32_f64_e32 v0, v[0:1]
%result = fptosi double %x to i32
store i32 %result, ptr addrspace(1) %out
ret void
}
-define amdgpu_ps void @s_fptoui_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptoui_f16_to_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT: s_mov_b32 s1, 0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT: v_readfirstlane_b32 s0, v2
-; GFX10-NEXT: v_mov_b32_e32 v3, s1
-; GFX10-NEXT: v_mov_b32_e32 v2, s0
-; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptoui_f16_to_i64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT: s_mov_b32 s1, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptoui_f16_to_i64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_f16 s0, s0
-; GFX12-NEXT: s_mov_b32 s1, 0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT: s_cvt_u32_f32 s0, s0
-; GFX12-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
-; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT: s_endpgm
- %result = fptoui half %x to i64
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @s_fptosi_f16_to_i64(half inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_fptosi_f16_to_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX10-NEXT: v_readfirstlane_b32 s0, v2
-; GFX10-NEXT: s_ashr_i32 s1, s0, 31
-; GFX10-NEXT: v_mov_b32_e32 v3, s1
-; GFX10-NEXT: v_mov_b32_e32 v2, s0
-; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_fptosi_f16_to_i64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_ashr_i32 s1, s0, 31
-; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
-; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_fptosi_f16_to_i64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_f16 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT: s_cvt_i32_f32 s0, s0
-; GFX12-NEXT: s_ashr_i32 s1, s0, 31
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
-; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT: s_endpgm
- %result = fptosi half %x to i64
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @v_fptoui_f16_to_i64(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptoui_f16_to_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT: v_mov_b32_e32 v4, 0
-; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptoui_f16_to_i64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT: v_mov_b32_e32 v4, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v0
-; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptoui_f16_to_i64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_mov_b32_e32 v1, 0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX12-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT: s_endpgm
- %result = fptoui half %x to i64
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_ps void @v_fptosi_f16_to_i64(half %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_fptosi_f16_to_i64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX10-NEXT: v_cvt_i32_f32_e32 v3, v0
-; GFX10-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_fptosi_f16_to_i64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0.l
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_i32_f32_e32 v3, v0
-; GFX11-NEXT: v_ashrrev_i32_e32 v4, 31, v3
-; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_fptosi_f16_to_i64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f32_f16_e32 v0, v0
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX12-NEXT: v_ashrrev_i32_e32 v1, 31, v0
-; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT: s_endpgm
- %result = fptosi half %x to i64
- store i64 %result, ptr addrspace(1) %out
- ret void
-}
-
define amdgpu_ps void @s_uitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i16_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f16_u16_e32 v2, s0
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
+; FAKE16-LABEL: s_uitofp_i16_to_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f16_u16_e32 v2, s0
;
-; GFX11-LABEL: s_uitofp_i16_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f16_u16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_uitofp_i16_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: v_cvt_f16_u16_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: s_uitofp_i16_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f16_u16_e32 v2.l, s0
%result = uitofp i16 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_sitofp_i16_to_f16(i16 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i16_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f16_i16_e32 v2, s0
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_sitofp_i16_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f16_i16_e32 v2.l, s0
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; FAKE16-LABEL: s_sitofp_i16_to_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f16_i16_e32 v2, s0
;
-; GFX12-LABEL: s_sitofp_i16_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: v_cvt_f16_i16_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: s_sitofp_i16_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f16_i16_e32 v2.l, s0
%result = sitofp i16 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_uitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i16_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f16_u16_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
+; FAKE16-LABEL: v_uitofp_i16_to_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f16_u16_e32 v0, v0
;
-; GFX11-LABEL: v_uitofp_i16_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f16_u16_e32 v0.l, v0.l
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i16_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: v_cvt_f16_u16_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: v_uitofp_i16_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f16_u16_e32 v0.l, v0.l
%result = uitofp i16 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_sitofp_i16_to_f16(i16 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i16_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f16_i16_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i16_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f16_i16_e32 v0.l, v0.l
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
+; FAKE16-LABEL: v_sitofp_i16_to_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f16_i16_e32 v0, v0
;
-; GFX12-LABEL: v_sitofp_i16_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: v_cvt_f16_i16_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; TRUE16-LABEL: v_sitofp_i16_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f16_i16_e32 v0.l, v0.l
%result = sitofp i16 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_uitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i32_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s0
-; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
+; PREGFX12-FAKE16-LABEL: s_uitofp_i32_to_f16:
+; PREGFX12-FAKE16: ; %bb.0:
+; PREGFX12-FAKE16: v_cvt_f32_u32_e32 v2, s0
+; PREGFX12-FAKE16: v_cvt_f16_f32_e32 v2, v2
;
-; GFX11-LABEL: s_uitofp_i32_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; TRUE16-LABEL: s_uitofp_i32_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f32_u32_e32 v2, s0
+; TRUE16: v_cvt_f16_f32_e32 v2.l, v2
;
; GFX12-LABEL: s_uitofp_i32_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_u32 s0, s0
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT: s_cvt_f16_f32 s0, s0
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_f32_u32 s0, s0
+; GFX12: s_cvt_f16_f32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = uitofp i32 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_sitofp_i32_to_f16(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i32_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s0
-; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2
-; GFX10-NEXT: global_store_short v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
+; PREGFX12-FAKE16-LABEL: s_sitofp_i32_to_f16:
+; PREGFX12-FAKE16: ; %bb.0:
+; PREGFX12-FAKE16: v_cvt_f32_i32_e32 v2, s0
+; PREGFX12-FAKE16: v_cvt_f16_f32_e32 v2, v2
;
-; GFX11-LABEL: s_sitofp_i32_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v2.l, v2
-; GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; TRUE16-LABEL: s_sitofp_i32_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f32_i32_e32 v2, s0
+; TRUE16: v_cvt_f16_f32_e32 v2.l, v2
;
; GFX12-LABEL: s_sitofp_i32_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_i32 s0, s0
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3)
-; GFX12-NEXT: s_cvt_f16_f32 s0, s0
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b16 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_f32_i32 s0, s0
+; GFX12: s_cvt_f16_f32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = sitofp i32 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_uitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i32_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_uitofp_i32_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i32_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; FAKE16-LABEL: v_uitofp_i32_to_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f32_u32_e32 v0, v0
+; FAKE16: v_cvt_f16_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_uitofp_i32_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f32_u32_e32 v0, v0
+; TRUE16: v_cvt_f16_f32_e32 v0.l, v0
%result = uitofp i32 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_sitofp_i32_to_f16(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i32_to_f16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-NEXT: global_store_short v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i32_to_f16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_cvt_f16_f32_e32 v0.l, v0
-; GFX11-NEXT: global_store_b16 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_sitofp_i32_to_f16:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 2, 2), 0
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX12-NEXT: global_store_b16 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; FAKE16-LABEL: v_sitofp_i32_to_f16:
+; FAKE16: ; %bb.0:
+; FAKE16: v_cvt_f32_i32_e32 v0, v0
+; FAKE16: v_cvt_f16_f32_e32 v0, v0
+;
+; TRUE16-LABEL: v_sitofp_i32_to_f16:
+; TRUE16: ; %bb.0:
+; TRUE16: v_cvt_f32_i32_e32 v0, v0
+; TRUE16: v_cvt_f16_f32_e32 v0.l, v0
%result = sitofp i32 %x to half
store half %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_uitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i32_to_f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s0
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_uitofp_i32_to_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s0
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; PREGFX12-LABEL: s_uitofp_i32_to_f32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: v_cvt_f32_u32_e32 v2, s0
;
; GFX12-LABEL: s_uitofp_i32_to_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_u32 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_f32_u32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = uitofp i32 %x to float
store float %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_sitofp_i32_to_f32(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i32_to_f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s0
-; GFX10-NEXT: global_store_dword v[0:1], v2, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_sitofp_i32_to_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s0
-; GFX11-NEXT: global_store_b32 v[0:1], v2, off
-; GFX11-NEXT: s_endpgm
+; PREGFX12-LABEL: s_sitofp_i32_to_f32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: v_cvt_f32_i32_e32 v2, s0
;
; GFX12-LABEL: s_sitofp_i32_to_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: s_cvt_f32_i32 s0, s0
-; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: global_store_b32 v[0:1], v2, off
-; GFX12-NEXT: s_endpgm
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_f32_i32 s0, s0
+; GFX12: v_mov_b32_e32 v2, s0
%result = sitofp i32 %x to float
store float %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_uitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i32_to_f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_uitofp_i32_to_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX11-NEXT: global_store_b32 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i32_to_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX12-NEXT: global_store_b32 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_uitofp_i32_to_f32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_f32_u32_e32 v0, v0
%result = uitofp i32 %x to float
store float %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_sitofp_i32_to_f32(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i32_to_f32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX10-NEXT: global_store_dword v[1:2], v0, off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i32_to_f32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX11-NEXT: global_store_b32 v[1:2], v0, off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_sitofp_i32_to_f32:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX12-NEXT: global_store_b32 v[4:5], v0, off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_sitofp_i32_to_f32:
+; GCN: ; %bb.0:
+; GCN: v_cvt_f32_i32_e32 v0, v0
%result = sitofp i32 %x to float
store float %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_uitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_uitofp_i32_to_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f64_u32_e32 v[2:3], s0
-; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_uitofp_i32_to_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f64_u32_e32 v[2:3], s0
-; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_uitofp_i32_to_f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f64_u32_e32 v[2:3], s0
-; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: s_uitofp_i32_to_f64:
+; GCN: ; %bb.0:
+; GCN: v_cvt_f64_u32_e32 v[2:3], s0
%result = uitofp i32 %x to double
store double %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @s_sitofp_i32_to_f64(i32 inreg %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: s_sitofp_i32_to_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f64_i32_e32 v[2:3], s0
-; GFX10-NEXT: global_store_dwordx2 v[0:1], v[2:3], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: s_sitofp_i32_to_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f64_i32_e32 v[2:3], s0
-; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: s_sitofp_i32_to_f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_cvt_f64_i32_e32 v[2:3], s0
-; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: s_sitofp_i32_to_f64:
+; GCN: ; %bb.0:
+; GCN: v_cvt_f64_i32_e32 v[2:3], s0
%result = sitofp i32 %x to double
store double %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_uitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_uitofp_i32_to_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
-; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_uitofp_i32_to_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f64_u32_e32 v[3:4], v0
-; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_uitofp_i32_to_f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_f64_u32_e32 v[0:1], v0
-; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_uitofp_i32_to_f64:
+; GCN: ; %bb.0:
+; GCN: v_cvt_f64_u32_e32 v[3:4], v0
%result = uitofp i32 %x to double
store double %result, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @v_sitofp_i32_to_f64(i32 %x, ptr addrspace(1) %out) {
-; GFX10-LABEL: v_sitofp_i32_to_f64:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: v_cvt_f64_i32_e32 v[3:4], v0
-; GFX10-NEXT: global_store_dwordx2 v[1:2], v[3:4], off
-; GFX10-NEXT: s_endpgm
-;
-; GFX11-LABEL: v_sitofp_i32_to_f64:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: v_cvt_f64_i32_e32 v[3:4], v0
-; GFX11-NEXT: global_store_b64 v[1:2], v[3:4], off
-; GFX11-NEXT: s_endpgm
-;
-; GFX12-LABEL: v_sitofp_i32_to_f64:
-; GFX12: ; %bb.0:
-; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
-; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
-; GFX12-NEXT: v_cvt_f64_i32_e32 v[0:1], v0
-; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
-; GFX12-NEXT: s_endpgm
+; GCN-LABEL: v_sitofp_i32_to_f64:
+; GCN: ; %bb.0:
+; GCN: v_cvt_f64_i32_e32 v[3:4], v0
%result = sitofp i32 %x to double
store double %result, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_ps float @fpext_hif16_to_32(<2 x half> inreg %val) {
+; PREGFX12-LABEL: fpext_hif16_to_32:
+; PREGFX12: ; %bb.0:
+; PREGFX12: s_lshr_b32 s0, s0, 16
+; PREGFX12: v_cvt_f32_f16_e32 v0, s0
+; PREGFX12: ; return to shader part epilog
+;
+; GFX12-LABEL: fpext_hif16_to_32:
+; GFX12: ; %bb.0:
+; GFX12: s_cvt_hi_f32_f16 s0, s0
+; GFX12: v_mov_b32_e32 v0, s0
+; GFX12: ; return to shader part epilog
+ %hielt = extractelement <2 x half> %val, i32 1
+ %res = fpext half %hielt to float
+ ret float %res
+}
More information about the llvm-commits
mailing list