[llvm] AMDGPU/GlobalISel: Regbanklegalize rules for G_INTRINSIC_FPTRUNC_ROUND (PR #185491)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 9 11:52:15 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: None (vangthao95)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/185491.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+8) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll (+104-50) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 73592d52ad04c..0872334ce7a74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -1312,6 +1312,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
       .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
       .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
 
+  addRulesForGOpcs({G_INTRINSIC_FPTRUNC_ROUND})
+      .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}})
+      .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
+      .Any({{UniS16, S64}, {{UniInVgprS16}, {Vgpr64}}})
+      .Any({{DivS16, S64}, {{Vgpr16}, {Vgpr64}}})
+      .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
+      .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
+
   addRulesForGOpcs({G_IS_FPCLASS})
       .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
       .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
index d2caadff14b85..985e9927481ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.err.ll
@@ -1,17 +1,17 @@
 ; RUN: split-file %s %t
 
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
+; RUN: not llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F64-FAIL %s
 
 ; TODO: check for GISEL when bfloat is supported.
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f32-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F32-FAIL %s
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/bf16-f64-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=BF16-F64-FAIL %s
 
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
+; RUN: not llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f16-f32-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F16-F32-TONEARESTAWAY-FAIL %s
 
 ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
+; RUN: not llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 -filetype=null %t/f32-f64-tonearestaway-err.ll 2>&1 | FileCheck --ignore-case --check-prefix=F32-F64-TONEARESTAWAY-FAIL %s
 
 ;--- f16-f64-err.ll
 define amdgpu_gs void @test_fptrunc_round_f16_f64(double %a, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index 3d9ce6e79d9d2..9554daeda07b3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
 
 define amdgpu_gs half @v_fptrunc_round_f32_to_f16_tonearest(float %a) {
 ; CHECK-LABEL: v_fptrunc_round_f32_to_f16_tonearest:
@@ -109,14 +109,23 @@ define amdgpu_gs void @v_fptrunc_round_f32_to_f16_towardzero_multiple_calls(floa
 }
 
 define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_upward:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
-; CHECK-NEXT:    ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_upward:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_f32_to_f16_upward:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT:    ; return to shader part epilog
   %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.upward")
   %bitcast = bitcast half %res to i16
   %ret = zext i16 %bitcast to i32
@@ -124,14 +133,23 @@ define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_upward(float inreg %a, ptr addr
 }
 
 define amdgpu_gs i32 @s_fptrunc_round_f32_to_f16_downward(float inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_f32_to_f16_downward:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
-; CHECK-NEXT:    ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_f32_to_f16_downward:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_f32_to_f16_downward:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT:    ; return to shader part epilog
   %res = call half @llvm.fptrunc.round.f16.f32(float %a, metadata !"round.downward")
   %bitcast = bitcast half %res to i16
   %ret = zext i16 %bitcast to i32
@@ -250,18 +268,31 @@ define amdgpu_gs void @v_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
 }
 
 define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; CHECK-NEXT:    v_mov_b32_e32 v1, s1
-; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
-; CHECK-NEXT:    v_readfirstlane_b32 s1, v1
-; CHECK-NEXT:    ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_upward:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; GISEL-NEXT:    s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT:    s_and_b32 s1, 0xffff, s1
+; GISEL-NEXT:    ; return to shader part epilog
   %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward")
   %bitcast = bitcast <2 x half> %res to <2 x i16>
   %ret = zext <2 x i16> %bitcast to <2 x i32>
@@ -269,18 +300,31 @@ define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_upward(<2 x float> in
 }
 
 define amdgpu_gs <2 x i32> @s_fptrunc_round_v2f32_to_v2f16_downward(<2 x float> inreg %a, ptr addrspace(1) %out) {
-; CHECK-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    v_mov_b32_e32 v0, s0
-; CHECK-NEXT:    v_mov_b32_e32 v1, s1
-; CHECK-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; CHECK-NEXT:    v_cvt_f16_f32_e32 v1, v1
-; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; CHECK-NEXT:    v_and_b32_e32 v1, 0xffff, v1
-; CHECK-NEXT:    v_readfirstlane_b32 s0, v0
-; CHECK-NEXT:    v_readfirstlane_b32 s1, v1
-; CHECK-NEXT:    ; return to shader part epilog
+; SDAG-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    v_mov_b32_e32 v0, s0
+; SDAG-NEXT:    v_mov_b32_e32 v1, s1
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; SDAG-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; SDAG-NEXT:    v_readfirstlane_b32 s1, v1
+; SDAG-NEXT:    ; return to shader part epilog
+;
+; GISEL-LABEL: s_fptrunc_round_v2f32_to_v2f16_downward:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
+; GISEL-NEXT:    v_readfirstlane_b32 s1, v1
+; GISEL-NEXT:    s_and_b32 s0, 0xffff, s0
+; GISEL-NEXT:    s_and_b32 s1, 0xffff, s1
+; GISEL-NEXT:    ; return to shader part epilog
   %res = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.downward")
   %bitcast = bitcast <2 x half> %res to <2 x i16>
   %ret = zext <2 x i16> %bitcast to <2 x i32>
@@ -325,15 +369,21 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v6, v4
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v7, v5
-; GISEL-NEXT:    v_pack_b32_f16 v2, v2, v3
+; GISEL-NEXT:    v_readfirstlane_b32 s0, v2
+; GISEL-NEXT:    v_readfirstlane_b32 s1, v3
+; GISEL-NEXT:    v_readfirstlane_b32 s2, v6
+; GISEL-NEXT:    v_readfirstlane_b32 s3, v7
 ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 2
-; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v4
-; GISEL-NEXT:    v_cvt_f16_f32_e32 v4, v5
-; GISEL-NEXT:    v_pack_b32_f16 v5, v6, v7
-; GISEL-NEXT:    v_pack_b32_f16 v3, v3, v4
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v4
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v3, v5
+; GISEL-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
+; GISEL-NEXT:    s_pack_ll_b32_b16 s1, s2, s3
+; GISEL-NEXT:    v_readfirstlane_b32 s2, v2
+; GISEL-NEXT:    v_readfirstlane_b32 s3, v3
 ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 0
-; GISEL-NEXT:    v_pk_add_f16 v2, v2, v5
-; GISEL-NEXT:    v_pk_add_f16 v2, v3, v2
+; GISEL-NEXT:    v_pk_add_f16 v2, s0, s1
+; GISEL-NEXT:    s_pack_ll_b32_b16 s0, s2, s3
+; GISEL-NEXT:    v_pk_add_f16 v2, s0, v2
 ; GISEL-NEXT:    global_store_dword v[0:1], v2, off
 ; GISEL-NEXT:    s_endpgm
   %res1 = call <2 x half> @llvm.fptrunc.round.v2f16.v2f32(<2 x float> %a, metadata !"round.upward")
@@ -358,10 +408,12 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %
 ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT:    v_lshl_or_b32 v1, s0, 16, v2
 ; GISEL-NEXT:    ; return to shader part epilog
   %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
   ret <3 x half> %res
@@ -380,10 +432,12 @@ define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float>
 ; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
+; GISEL-NEXT:    v_cvt_f16_f32_e32 v2, v2
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GISEL-NEXT:    v_pack_b32_f16 v0, v0, v1
-; GISEL-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; GISEL-NEXT:    v_lshl_or_b32 v1, s0, 16, v2
 ; GISEL-NEXT:    ; return to shader part epilog
   %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
   ret <3 x half> %res

``````````

</details>


https://github.com/llvm/llvm-project/pull/185491


More information about the llvm-commits mailing list