[llvm-branch-commits] [llvm] [AMDGPU] Add KnownBits simplification combines to RegBankCombiner (PR #141591)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 27 05:51:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-globalisel
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
This
---
Patch is 38.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141591.diff
8 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+30-29)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+21-40)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+22-41)
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+13-17)
- (modified) llvm/test/CodeGen/AMDGPU/itofp.i128.ll (+5-6)
- (modified) llvm/test/CodeGen/AMDGPU/lround.ll (+9-9)
- (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+2-14)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 96be17c487130..df867aaa204b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -210,5 +210,6 @@ def AMDGPURegBankCombiner : GICombiner<
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
identity_combines, redundant_and, constant_fold_cast_op,
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
- lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
+ lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract,
+ known_bits_simplifications]> {
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 6baa10bb48621..cc0f45681a3e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1744,63 +1744,64 @@ define i65 @v_lshr_i65_33(i65 %value) {
; GFX6-LABEL: v_lshr_i65_33:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT: v_mov_b32_e32 v3, v1
-; GFX6-NEXT: v_mov_b32_e32 v0, 1
+; GFX6-NEXT: v_mov_b32_e32 v3, 1
+; GFX6-NEXT: v_mov_b32_e32 v4, 0
+; GFX6-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX6-NEXT: v_lshl_b64 v[2:3], v[3:4], 31
+; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
; GFX6-NEXT: v_mov_b32_e32 v1, 0
-; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
-; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
; GFX6-NEXT: v_mov_b32_e32 v2, 0
; GFX6-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_lshr_i65_33:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v3, v1
-; GFX8-NEXT: v_mov_b32_e32 v0, 1
+; GFX8-NEXT: v_mov_b32_e32 v3, 1
+; GFX8-NEXT: v_mov_b32_e32 v4, 0
+; GFX8-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_mov_b32_e32 v1, 0
-; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-NEXT: v_mov_b32_e32 v2, 0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_lshr_i65_33:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v3, v1
-; GFX9-NEXT: v_mov_b32_e32 v0, 1
+; GFX9-NEXT: v_mov_b32_e32 v3, 1
+; GFX9-NEXT: v_mov_b32_e32 v4, 0
+; GFX9-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
; GFX9-NEXT: v_mov_b32_e32 v1, 0
-; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_lshr_i65_33:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_mov_b32_e32 v3, v1
-; GFX10-NEXT: v_mov_b32_e32 v0, 1
+; GFX10-NEXT: v_mov_b32_e32 v3, 1
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: v_and_b32_e32 v0, 1, v2
-; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX10-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX10-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_lshr_i65_33:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v0, 1
-; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v2
-; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v3
-; GFX11-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
-; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX11-NEXT: v_mov_b32_e32 v3, 1
+; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_and_b32 v3, 1, v2
+; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v1
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: v_lshlrev_b64 v[2:3], 31, v[3:4]
+; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%result = lshr i65 %value, 33
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 7b2872f081979..93629f3bf9548 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_saddsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_saddsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -386,11 +384,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s3, s1, 8
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s1, s4, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s5
; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -400,11 +397,10 @@ define amdgpu_ps i16 @s_saddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s2, s3, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s4
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -787,11 +783,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshr_b32 s7, s1, 24
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_sub_i32 s8, 0x8000, s8
+; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_max_i32 s1, s8, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s9
; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -801,11 +796,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s8, s5, 0
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_sub_i32 s8, 0x7fff, s8
; GFX8-NEXT: s_max_i32 s2, s5, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s8
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_add_i32 s1, s1, s2
@@ -815,11 +809,10 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s5, 0
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
+; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s3, s5, s3
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_add_i32 s2, s2, s3
@@ -829,14 +822,13 @@ define amdgpu_ps i32 @s_saddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_min_i32 s5, s5, 0
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_sub_i32 s5, 0x8000, s5
-; GFX8-NEXT: s_sext_i32_i16 s5, s5
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
-; GFX8-NEXT: s_max_i32 s4, s5, s4
+; GFX8-NEXT: s_sext_i32_i16 s5, s5
+; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s0, s0
; GFX8-NEXT: s_ashr_i32 s1, s1, 8
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
+; GFX8-NEXT: s_max_i32 s4, s5, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s6
; GFX8-NEXT: s_ashr_i32 s0, s0, 8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
@@ -2631,11 +2623,10 @@ define amdgpu_ps i16 @s_saddsat_i16(i16 inreg %lhs, i16 inreg %rhs) {
; GFX8-NEXT: s_max_i32 s3, s2, 0
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sub_i32 s3, 0x7fff, s3
; GFX8-NEXT: s_max_i32 s1, s2, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s3
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_add_i32 s0, s0, s1
@@ -2835,11 +2826,10 @@ define amdgpu_ps i32 @s_saddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s1
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s3, s3, s5
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3190,11 +3180,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_max_i32 s7, s6, 0
; GFX8-NEXT: s_min_i32 s6, s6, 0
; GFX8-NEXT: s_sub_i32 s6, 0x8000, s6
+; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s8, s2
-; GFX8-NEXT: s_sub_i32 s7, 0x7fff, s7
; GFX8-NEXT: s_max_i32 s6, s6, s8
-; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s7, s7
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
; GFX8-NEXT: s_min_i32 s6, s6, s7
@@ -3215,11 +3204,10 @@ define amdgpu_ps <2 x i32> @s_saddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
; GFX8-NEXT: s_max_i32 s6, s2, 0
; GFX8-NEXT: s_min_i32 s2, s2, 0
; GFX8-NEXT: s_sub_i32 s2, 0x8000, s2
+; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s7, s3
-; GFX8-NEXT: s_sub_i32 s6, 0x7fff, s6
; GFX8-NEXT: s_max_i32 s2, s2, s7
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
; GFX8-NEXT: s_min_i32 s2, s2, s6
@@ -3513,11 +3501,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s10, s9, 0
; GFX8-NEXT: s_min_i32 s9, s9, 0
; GFX8-NEXT: s_sub_i32 s9, 0x8000, s9
+; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s11, s3
-; GFX8-NEXT: s_sub_i32 s10, 0x7fff, s10
; GFX8-NEXT: s_max_i32 s9, s9, s11
-; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s10, s10
; GFX8-NEXT: s_lshr_b32 s6, s0, 16
; GFX8-NEXT: s_min_i32 s9, s9, s10
@@ -3538,11 +3525,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s9, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s10, s4
-; GFX8-NEXT: s_sub_i32 s9, 0x7fff, s9
; GFX8-NEXT: s_max_i32 s3, s3, s10
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_lshr_b32 s7, s1, 16
; GFX8-NEXT: s_min_i32 s3, s3, s9
@@ -3563,11 +3549,10 @@ define amdgpu_ps <3 x i32> @s_saddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
; GFX8-NEXT: s_max_i32 s4, s3, 0
; GFX8-NEXT: s_min_i32 s3, s3, 0
; GFX8-NEXT: s_sub_i32 s3, 0x8000, s3
+; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s9, s5
-; GFX8-NEXT: s_sub_i32 s4, 0x7fff, s4
; GFX8-NEXT: s_max_i32 s3, s3, s9
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_lshr_b32 s8, s2, 16
; GFX8-NEXT: s_min_i32 s3, s3, s4
@@ -3924,11 +3909,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s13, s12, 0
; GFX8-NEXT: s_min_i32 s12, s12, 0
; GFX8-NEXT: s_sub_i32 s12, 0x8000, s12
+; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s14, s4
-; GFX8-NEXT: s_sub_i32 s13, 0x7fff, s13
; GFX8-NEXT: s_max_i32 s12, s12, s14
-; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_sext_i32_i16 s13, s13
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
; GFX8-NEXT: s_min_i32 s12, s12, s13
@@ -3949,11 +3933,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s12, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s13, s5
-; GFX8-NEXT: s_sub_i32 s12, 0x7fff, s12
; GFX8-NEXT: s_max_i32 s4, s4, s13
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s12
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
; GFX8-NEXT: s_min_i32 s4, s4, s12
@@ -3974,11 +3957,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s12, s6
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s4, s4, s12
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
@@ -3999,11 +3981,10 @@ define amdgpu_ps <4 x i32> @s_saddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
; GFX8-NEXT: s_max_i32 s5, s4, 0
; GFX8-NEXT: s_min_i32 s4, s4, 0
; GFX8-NEXT: s_sub_i32 s4, 0x8000, s4
+; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s6, s7
-; GFX8-NEXT: s_sub_i32 s5, 0x7fff, s5
; GFX8-NEXT: s_max_i32 s4, s4, s6
-; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
; GFX8-NEXT: s_min_i32 s4, s4, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index a60995ecde3a8..558c11ec9c300 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -80,11 +80,10 @@ define amdgpu_ps i7 @s_ssubsat_i7(i7 inreg %lhs, i7 inreg %rhs) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 9
; GFX8-NEXT: s_add_i32 s3, s3, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, -1
+; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -189,11 +188,10 @@ define amdgpu_ps i8 @s_ssubsat_i8(i8 inreg %lhs, i8 inreg %rhs) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_add_i32 s3, s3, 0x8001
; GFX8-NEXT: s_min_i32 s2, s2, -1
+; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s2, s2, 0x8000
; GFX8-NEXT: s_max_i32 s1, s3, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_min_i32 s1, s1, s2
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -387,11 +385,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_add_i32 s5, s5, 0x8001
; GFX8-NEXT: s_min_i32 s4, s4, -1
+; GFX8-NEXT: s_add_i32 s4, s4, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s4, s4, 0x8000
; GFX8-NEXT: s_max_i32 s1, s5, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_min_i32 s1, s1, s4
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -401,11 +398,10 @@ define amdgpu_ps i16 @s_ssubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s4, s3, -1
; GFX8-NEXT: s_add_i32 s4, s4, 0x8001
; GFX8-NEXT: s_min_i32 s3, s3, -1
+; GFX8-NEXT: s_add_i32 s3, s3, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s4, s4
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_add_i32 s3, s3, 0x8000
; GFX8-NEXT: s_max_i32 s2, s4, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_min_i32 s2, s2, s3
; GFX8-NEXT: s_sub_i32 s1, s1, s2
@@ -788,11 +784,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s1, s1, 8
; GFX8-NEXT: s_add_i32 s9, s9, 0x8001
; GFX8-NEXT: s_min_i32 s8, s8, -1
+; GFX8-NEXT: s_add_i32 s8, s8, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s9, s9
; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_add_i32 s8, s8, 0x8000
; GFX8-NEXT: s_max_i32 s1, s9, s1
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_min_i32 s1, s1, s8
; GFX8-NEXT: s_sub_i32 s0, s0, s1
@@ -802,11 +797,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s8, s5, -1
; GFX8-NEXT: s_add_i32 s8, s8, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, -1
+; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s8, s8
; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_max_i32 s2, s8, s2
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s2, s2, s5
; GFX8-NEXT: s_sub_i32 s1, s1, s2
@@ -816,11 +810,10 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_max_i32 s6, s5, -1
; GFX8-NEXT: s_add_i32 s6, s6, 0x8001
; GFX8-NEXT: s_min_i32 s5, s5, -1
+; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_sext_i32_i16 s6, s6
; GFX8-NEXT: s_sext_i32_i16 s3, s3
-; GFX8-NEXT: s_add_i32 s5, s5, 0x8000
; GFX8-NEXT: s_max_i32 s3, s6, s3
-; GFX8-NEXT: s_sext_i32_i16 s3, s3
; GFX8-NEXT: s_sext_i32_i16 s5, s5
; GFX8-NEXT: s_min_i32 s3, s3, s5
; GFX8-NEXT: s_sub_i32 s2, s2, s3
@@ -830,14 +823,13 @@ define amdgpu_ps i32 @s_ssubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
; GFX8-NEXT: s_lshl_b32 s4, s7, 8
; GFX8-NEXT: s_add_i32 s6, s6, 0x8001
; GFX8-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141591
More information about the llvm-branch-commits
mailing list