[llvm] [AMDGPU] Add identity_combines to RegBankCombiner (PR #131305)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 14 04:19:27 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
---
Patch is 128.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131305.diff
21 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+2-2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll (-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+60-61)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll (+1-5)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+30-31)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll (-9)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+90-91)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+54-55)
- (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+5-5)
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+183-199)
- (modified) llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll (+1-3)
- (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+172-184)
- (modified) llvm/test/CodeGen/AMDGPU/fptrunc.ll (-4)
- (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+30-14)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0dfbb91f2ac54..bdb0d01e74f00 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2567,7 +2567,7 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI,
SrcReg = OriginalSrcReg;
LLT DstTy = MRI.getType(DstReg);
return mi_match(SrcReg, MRI,
- m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) && canReplaceReg(DstReg, Reg, MRI);
}
bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
@@ -2577,7 +2577,7 @@ bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
if (mi_match(SrcReg, MRI,
- m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
+ m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) && canReplaceReg(DstReg, Reg, MRI)) {
unsigned DstSize = DstTy.getScalarSizeInBits();
unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index da47aaf8a3b5c..36653867fbba0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -180,5 +180,5 @@ def AMDGPURegBankCombiner : GICombiner<
[unmerge_merge, unmerge_cst, unmerge_undef,
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
- redundant_and]> {
+ identity_combines, redundant_and]> {
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index ff5880819020d..38374d1689366 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -640,7 +640,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
; GFX7-NEXT: s_cmp_lt_i32 s1, 0
; GFX7-NEXT: s_cselect_b32 s1, 1, 0
; GFX7-NEXT: s_xor_b32 s0, s1, s0
-; GFX7-NEXT: s_and_b32 s0, s0, 1
; GFX7-NEXT: s_add_i32 s0, s2, s0
; GFX7-NEXT: ; return to shader part epilog
;
@@ -652,7 +651,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
; GFX8-NEXT: s_cmp_lt_i32 s1, 0
; GFX8-NEXT: s_cselect_b32 s1, 1, 0
; GFX8-NEXT: s_xor_b32 s0, s1, s0
-; GFX8-NEXT: s_and_b32 s0, s0, 1
; GFX8-NEXT: s_add_i32 s0, s2, s0
; GFX8-NEXT: ; return to shader part epilog
;
@@ -664,7 +662,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
; GFX9-NEXT: s_cmp_lt_i32 s1, 0
; GFX9-NEXT: s_cselect_b32 s1, 1, 0
; GFX9-NEXT: s_xor_b32 s0, s1, s0
-; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_add_i32 s0, s2, s0
; GFX9-NEXT: ; return to shader part epilog
%saddo = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
@@ -749,8 +746,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
; GFX7-NEXT: s_cselect_b32 s3, 1, 0
; GFX7-NEXT: s_xor_b32 s0, s2, s0
; GFX7-NEXT: s_xor_b32 s1, s3, s1
-; GFX7-NEXT: s_and_b32 s0, s0, 1
-; GFX7-NEXT: s_and_b32 s1, s1, 1
; GFX7-NEXT: s_add_i32 s0, s4, s0
; GFX7-NEXT: s_add_i32 s1, s5, s1
; GFX7-NEXT: ; return to shader part epilog
@@ -769,8 +764,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
; GFX8-NEXT: s_cselect_b32 s3, 1, 0
; GFX8-NEXT: s_xor_b32 s0, s2, s0
; GFX8-NEXT: s_xor_b32 s1, s3, s1
-; GFX8-NEXT: s_and_b32 s0, s0, 1
-; GFX8-NEXT: s_and_b32 s1, s1, 1
; GFX8-NEXT: s_add_i32 s0, s4, s0
; GFX8-NEXT: s_add_i32 s1, s5, s1
; GFX8-NEXT: ; return to shader part epilog
@@ -789,8 +782,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
; GFX9-NEXT: s_cselect_b32 s3, 1, 0
; GFX9-NEXT: s_xor_b32 s0, s2, s0
; GFX9-NEXT: s_xor_b32 s1, s3, s1
-; GFX9-NEXT: s_and_b32 s0, s0, 1
-; GFX9-NEXT: s_and_b32 s1, s1, 1
; GFX9-NEXT: s_add_i32 s0, s4, s0
; GFX9-NEXT: s_add_i32 s1, s5, s1
; GFX9-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index ee89b28a0d2bb..2c44d719d0b45 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -106,7 +106,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
; GCN-NEXT: s_mov_b32 s2, 0
; GCN-NEXT: .LBB4_2: ; %Flow
; GCN-NEXT: s_xor_b32 s2, s2, 1
-; GCN-NEXT: s_and_b32 s2, s2, 1
; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_cbranch_scc1 .LBB4_4
; GCN-NEXT: ; %bb.3: ; %.zero
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index a354c072aa150..c295a662704e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -36,7 +36,6 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: .LBB0_2: ; %Flow
; GFX9-NEXT: s_xor_b32 s0, s0, 1
-; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB0_4
; GFX9-NEXT: ; %bb.3: ; %bb0
@@ -121,7 +120,6 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: .LBB1_2: ; %Flow
; GFX9-NEXT: s_xor_b32 s0, s0, 1
-; GFX9-NEXT: s_and_b32 s0, s0, 1
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
; GFX9-NEXT: s_cbranch_scc1 .LBB1_4
; GFX9-NEXT: ; %bb.3: ; %bb0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index 755eb13a61e14..5240bf4f3a1d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -356,7 +356,6 @@ define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %
; OLD_RBS-NEXT: s_cmp_ge_u32 s1, 20
; OLD_RBS-NEXT: s_cselect_b32 s3, 1, 0
; OLD_RBS-NEXT: s_and_b32 s2, s2, s3
-; OLD_RBS-NEXT: s_and_b32 s2, s2, 1
; OLD_RBS-NEXT: s_cmp_lg_u32 s2, 0
; OLD_RBS-NEXT: s_cselect_b32 s0, s0, s1
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 4bfd29430ff1e..694a81a9668f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -1077,7 +1077,6 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX8-NEXT: s_xor_b32 s0, s1, s0
; GFX8-NEXT: s_ashr_i32 s1, s3, 23
; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000
-; GFX8-NEXT: s_and_b32 s0, s0, 1
; GFX8-NEXT: s_cmp_lg_u32 s0, 0
; GFX8-NEXT: s_cselect_b32 s0, s1, s2
; GFX8-NEXT: ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 2a200259a93d2..4031fe0be2823 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -171,17 +171,17 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
-; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mul_lo_u32 v3, v0, v2
+; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v0
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = sdiv i64 %num, %den
@@ -335,7 +335,6 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s0, s0, 1
-; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
; CHECK-NEXT: ; %bb.4:
@@ -809,17 +808,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
+; CGP-NEXT: v_mul_lo_u32 v2, v0, v4
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v4
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: .LBB2_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
@@ -981,17 +980,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
-; CGP-NEXT: v_mul_lo_u32 v3, v2, v6
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v3, 0
+; CGP-NEXT: v_mul_lo_u32 v4, v2, v6
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v4, v6
+; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_setpc_b64 s[30:31]
%result = sdiv <2 x i64> %num, %den
@@ -1817,17 +1816,17 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
-; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mul_lo_u32 v2, v0, v5
+; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%shl.y = shl i64 4096, %y
@@ -2279,17 +2278,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
-; CGP-NEXT: v_mul_lo_u32 v1, v0, v11
-; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v11
-; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: v_mov_b32_e32 v1, 0
+; CGP-NEXT: v_mul_lo_u32 v2, v0, v11
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v11
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CGP-NEXT: .LBB8_4:
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: v_or_b32_e32 v3, v7, v10
@@ -2453,17 +2452,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
-; CGP-NEXT: v_mul_lo_u32 v3, v2, v9
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9
-; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
-; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
-; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
; CGP-NEXT: v_mov_b32_e32 v3, 0
+; CGP-NEXT: v_mul_lo_u32 v4, v2, v9
+; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
+; CGP-NEXT: v_sub_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v4, v9
+; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
+; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
; CGP-NEXT: s_setpc_b64 s[30:31]
%shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
index bac80f0777c02..8300e2542d452 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
@@ -1444,7 +1444,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
-; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 0
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 10
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 10, v2
@@ -1459,7 +1458,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1
; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
-; GFX8-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 10
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 10, v2
@@ -1473,7 +1471,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1
; GFX9-NEXT: v_or_b32_e32 v2, v2, v3
-; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10
@@ -1486,9 +1483,8 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1
-; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
-; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3
; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10
+; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2
; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 2bb42308d935c..1a10f5fb7a5ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -167,15 +167,15 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: v_mul_lo_u32 v0, v0, v2
; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_setpc_b64 s[30:31]
%result = srem i64 %num, %den
@@ -327,7 +327,6 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
; CHECK-NEXT: .LBB1_3: ; %Flow
; CHECK-NEXT: s_xor_b32 s0, s7, 1
-; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
; CHECK-NEXT: ; %bb.4:
@@ -791,15 +790,15 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
+; CGP-NEXT: v_mov_b32_e32 v1, 0
; CGP-NEXT: v_mul_lo_u32 v0, v0, v4
; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v4
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
+; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v4
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; CGP-NEXT: v_mov_b32_e32 v1, 0
+; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
; CGP-NEXT: .LBB2_4:
; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
@@ -959,15 +958,15 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
+; CGP-NEXT: v_mov_b32_e32 v3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/131305
More information about the llvm-commits
mailing list