[llvm] AMDGPU/GlobalISel: Regbanklegalize rules for G_INTRINSIC_FPTRUNC_ROUND (PR #185491)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 11:52:15 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: None (vangthao95)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/185491.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+8)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll (+104-50)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 73592d52ad04c..0872334ce7a74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1312,6 +1312,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
.Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
+ addRulesForGOpcs({G_INTRINSIC_FPTRUNC_ROUND})
+ .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}})
+ .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
+ .Any({{UniS16, S64}, {{UniInVgprS16}, {Vgpr64}}})
+ .Any({{DivS16, S64}, {{Vgpr16}, {Vgpr64}}})
+ .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
+ .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
+
addRulesForGOpcs({G_IS_FPCLASS})
.Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
.Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
index d2caadff14b85..985e9927481ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
@@ -1,17 +1,17 @@
; RUN: split-file %s %t
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
+; RUN: not llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
; TODO: check for GISEL when bfloat is supported.
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+; RUN: not llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
+; RUN: not llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
;--- f16-f64-err.ll
define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 3d9ce6e79d9d2..9554daeda07b3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
define amdgpu_gs half @v_fptrunc_round_f32_to_f16_tonearest(float %a) {
; CHECK-LABEL: v_fptrunc_round_f32_to_f16_tonearest:
@@ -109,14 +109,23 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(floa
}
define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_f32_to_f16_upward:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
%bitcast = bitcast half %res to i16
%ret = zext i16 %bitcast to i32
@@ -124,14 +133,23 @@ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addr
}
define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_downward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_f32_to_f16_downward:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT: ; return to shader part epilog
%res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
%bitcast = bitcast half %res to i16
%ret = zext i16 %bitcast to i32
@@ -250,18 +268,31 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
}
define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: v_readfirstlane_b32 s1, v1
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT: s_and_b32 s1, 0xffff, s1
+; GISEL-NEXT: ; return to shader part epilog
%res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward")
%bitcast = bitcast <2 x half> %res to <2 x i16>
%ret = zext <2 x i16> %bitcast to <2 x i32>
@@ -269,18 +300,31 @@ define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> in
}
define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: v_mov_b32_e32 v0, s0
-; CHECK-NEXT: v_mov_b32_e32 v1, s1
-; CHECK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT: v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; CHECK-NEXT: v_readfirstlane_b32 s0, v0
-; CHECK-NEXT: v_readfirstlane_b32 s1, v1
-; CHECK-NEXT: ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: v_mov_b32_e32 v0, s0
+; SDAG-NEXT: v_mov_b32_e32 v1, s1
+; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; SDAG-NEXT: v_readfirstlane_b32 s0, v0
+; SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SDAG-NEXT: ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: v_mov_b32_e32 v0, s0
+; GISEL-NEXT: v_mov_b32_e32 v1, s1
+; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT: v_readfirstlane_b32 s0, v0
+; GISEL-NEXT: v_readfirstlane_b32 s1, v1
+; GISEL-NEXT: s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT: s_and_b32 s1, 0xffff, s1
+; GISEL-NEXT: ; return to shader part epilog
%res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward")
%bitcast = bitcast <2 x half> %res to <2 x i16>
%ret = zext <2 x i16> %bitcast to <2 x i32>
@@ -325,15 +369,21 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
; GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
; GISEL-NEXT: v_cvt_f16_f32_e32 v6, v4
; GISEL-NEXT: v_cvt_f16_f32_e32 v7, v5
-; GISEL-NEXT: v_pack_b32_f16 v2, v2, v3
+; GISEL-NEXT: v_readfirstlane_b32 s0, v2
+; GISEL-NEXT: v_readfirstlane_b32 s1, v3
+; GISEL-NEXT: v_readfirstlane_b32 s2, v6
+; GISEL-NEXT: v_readfirstlane_b32 s3, v7
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; GISEL-NEXT: v_cvt_f16_f32_e32 v3, v4
-; GISEL-NEXT: v_cvt_f16_f32_e32 v4, v5
-; GISEL-NEXT: v_pack_b32_f16 v5, v6, v7
-; GISEL-NEXT: v_pack_b32_f16 v3, v3, v4
+; GISEL-NEXT: v_cvt_f16_f32_e32 v2, v4
+; GISEL-NEXT: v_cvt_f16_f32_e32 v3, v5
+; GISEL-NEXT: s_pack_ll_b32_b16 s0, s0, s1
+; GISEL-NEXT: s_pack_ll_b32_b16 s1, s2, s3
+; GISEL-NEXT: v_readfirstlane_b32 s2, v2
+; GISEL-NEXT: v_readfirstlane_b32 s3, v3
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
-; GISEL-NEXT: v_pk_add_f16 v2, v2, v5
-; GISEL-NEXT: v_pk_add_f16 v2, v3, v2
+; GISEL-NEXT: v_pk_add_f16 v2, s0, s1
+; GISEL-NEXT: s_pack_ll_b32_b16 s0, s2, s3
+; GISEL-NEXT: v_pk_add_f16 v2, s0, v2
; GISEL-NEXT: global_store_dword v[0:1], v2, off
; GISEL-NEXT: s_endpgm
%res1 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward")
@@ -358,10 +408,12 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
-; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v2
; GISEL-NEXT: ; return to shader part epilog
%res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
ret <3 x half> %res
@@ -380,10 +432,12 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float>
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
-; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT: v_lshl_or_b32 v1, s0, 16, v2
; GISEL-NEXT: ; return to shader part epilog
%res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
ret <3 x half> %res
``````````
</details>
https://github.com/llvm/llvm-project/pull/185491
More information about the llvm-commits
mailing list