[llvm] Add freeze for LowerSELECT (PR #148796)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 15 00:28:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (Shoreshen)
<details>
<summary>Changes</summary>
Trying to solve https://github.com/llvm/llvm-project/issues/147635
Add freeze for legalizer when breaking i64 select to 2 i32 select.
Several tests changed, still need to investigate why.
---
Patch is 109.76 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148796.diff
14 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+8-2)
- (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (+300-301)
- (modified) llvm/test/CodeGen/AMDGPU/fmaximum3.ll (+18-12)
- (modified) llvm/test/CodeGen/AMDGPU/fminimum3.ll (+18-12)
- (modified) llvm/test/CodeGen/AMDGPU/fnearbyint.ll (+14-7)
- (modified) llvm/test/CodeGen/AMDGPU/fract-match.ll (+31-26)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.frexp.ll (+61-32)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll (+205-173)
- (modified) llvm/test/CodeGen/AMDGPU/lround.ll (+29-22)
- (modified) llvm/test/CodeGen/AMDGPU/roundeven.ll (+22-15)
- (modified) llvm/test/CodeGen/AMDGPU/select-undef.ll (+20)
- (modified) llvm/test/CodeGen/AMDGPU/srem.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/srem64.ll (+44-44)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6cf2055c8e565..15b0d547bee62 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11071,11 +11071,17 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
VT.getSizeInBits() == 512)
return splitTernaryVectorOp(Op, DAG);
- assert(VT.getSizeInBits() == 64);
SDLoc DL(Op);
SDValue Cond = Op.getOperand(0);
-
+ if (Cond.getOpcode() == ISD::SETCC) {
+ SDValue Freeze = DAG.getFreeze(Cond.getOperand(0));
+ if (Freeze != Cond.getOperand(0)) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ Cond =
+ DAG.getSetCC(DL, Cond.getValueType(), Freeze, Cond.getOperand(1), CC);
+ }
+ }
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..43128db05a597 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -6,77 +6,77 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-LABEL: v_sdiv_v2i128_vv:
; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 0, v0
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
+; SDAG-NEXT: v_sub_i32_e32 v18, vcc, 0, v0
+; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_ashrrev_i32_e32 v24, 31, v3
; SDAG-NEXT: v_ashrrev_i32_e32 v25, 31, v11
-; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
-; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v1, vcc
+; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f
+; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v1, vcc
; SDAG-NEXT: v_mov_b32_e32 v26, v24
; SDAG-NEXT: v_mov_b32_e32 v27, v25
-; SDAG-NEXT: v_subb_u32_e32 v19, vcc, 0, v2, vcc
-; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
-; SDAG-NEXT: v_cndmask_b32_e64 v21, v1, v17, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v20, v0, v16, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v3, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v16, v2, v19, s[4:5]
-; SDAG-NEXT: v_ffbh_u32_e32 v1, v20
-; SDAG-NEXT: v_ffbh_u32_e32 v2, v21
-; SDAG-NEXT: v_cndmask_b32_e64 v17, v3, v0, s[4:5]
-; SDAG-NEXT: v_or_b32_e32 v0, v20, v16
-; SDAG-NEXT: v_sub_i32_e32 v3, vcc, 0, v8
-; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], 32, v1
-; SDAG-NEXT: v_ffbh_u32_e32 v22, v16
+; SDAG-NEXT: v_subb_u32_e32 v16, vcc, 0, v2, vcc
+; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v3, vcc
+; SDAG-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; SDAG-NEXT: v_cndmask_b32_e32 v17, v3, v17, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v16, v2, v16, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v21, v1, v20, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v20, v0, v18, vcc
+; SDAG-NEXT: v_sub_i32_e32 v2, vcc, 0, v8
; SDAG-NEXT: v_or_b32_e32 v1, v21, v17
-; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v9, vcc
-; SDAG-NEXT: v_min_u32_e32 v2, v19, v2
-; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], 32, v22
+; SDAG-NEXT: v_or_b32_e32 v0, v20, v16
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, 0, v9, vcc
+; SDAG-NEXT: v_ffbh_u32_e32 v18, v16
; SDAG-NEXT: v_ffbh_u32_e32 v22, v17
+; SDAG-NEXT: v_ffbh_u32_e32 v23, v20
+; SDAG-NEXT: v_ffbh_u32_e32 v28, v21
; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
-; SDAG-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
-; SDAG-NEXT: v_cndmask_b32_e64 v28, v9, v23, s[6:7]
; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v10, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v29, v8, v3, s[6:7]
-; SDAG-NEXT: v_min_u32_e32 v1, v19, v22
-; SDAG-NEXT: v_add_i32_e64 v2, s[8:9], 64, v2
-; SDAG-NEXT: v_addc_u32_e64 v3, s[8:9], 0, 0, s[8:9]
-; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v11, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[6:7]
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
-; SDAG-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v10, v2, v1, vcc
-; SDAG-NEXT: v_ffbh_u32_e32 v3, v29
-; SDAG-NEXT: v_ffbh_u32_e32 v19, v28
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v11, v8, s[6:7]
-; SDAG-NEXT: v_or_b32_e32 v2, v29, v0
-; SDAG-NEXT: v_add_i32_e32 v8, vcc, 32, v3
-; SDAG-NEXT: v_ffbh_u32_e32 v11, v0
+; SDAG-NEXT: v_add_i32_e64 v1, s[6:7], 32, v18
+; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v23
+; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v11, vcc
+; SDAG-NEXT: v_min_u32_e32 v22, v1, v22
+; SDAG-NEXT: v_min_u32_e32 v18, v18, v28
+; SDAG-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; SDAG-NEXT: v_cndmask_b32_e32 v1, v11, v23, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v28, v9, v3, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v29, v8, v2, vcc
+; SDAG-NEXT: v_add_i32_e32 v8, vcc, 64, v18
+; SDAG-NEXT: v_addc_u32_e64 v9, s[6:7], 0, 0, vcc
; SDAG-NEXT: v_or_b32_e32 v3, v28, v1
-; SDAG-NEXT: v_min_u32_e32 v8, v8, v19
-; SDAG-NEXT: v_add_i32_e32 v11, vcc, 32, v11
-; SDAG-NEXT: v_ffbh_u32_e32 v19, v1
+; SDAG-NEXT: v_or_b32_e32 v2, v29, v0
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
+; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v8, v8, v22, vcc
+; SDAG-NEXT: v_ffbh_u32_e32 v10, v0
+; SDAG-NEXT: v_ffbh_u32_e32 v11, v1
+; SDAG-NEXT: v_ffbh_u32_e32 v18, v29
+; SDAG-NEXT: v_ffbh_u32_e32 v22, v28
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
-; SDAG-NEXT: v_min_u32_e32 v2, v11, v19
-; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 64, v8
-; SDAG-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
-; SDAG-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[0:1]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[6:7]
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[6:7]
+; SDAG-NEXT: v_add_i32_e64 v2, s[6:7], 32, v10
+; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 32, v18
+; SDAG-NEXT: v_min_u32_e32 v2, v2, v11
+; SDAG-NEXT: v_min_u32_e32 v3, v3, v22
; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
-; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
-; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v9, vcc
+; SDAG-NEXT: v_add_i32_e32 v3, vcc, 64, v3
+; SDAG-NEXT: v_addc_u32_e64 v10, s[4:5], 0, 0, vcc
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT: v_cndmask_b32_e64 v10, v10, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
+; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v10, v9, vcc
; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v2
-; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v18, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3]
-; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v18, vcc
+; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v19, vcc
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[2:3]
+; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
+; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v19, vcc
; SDAG-NEXT: v_or_b32_e32 v8, v8, v10
; SDAG-NEXT: v_or_b32_e32 v9, v3, v11
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
-; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
+; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v18, v19, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v8, v19, v18, s[4:5]
; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
@@ -1564,67 +1564,67 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 0, v0
; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_ashrrev_i32_e32 v28, 31, v3
-; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
+; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f
; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v1, vcc
; SDAG-NEXT: v_mov_b32_e32 v29, v28
; SDAG-NEXT: v_subb_u32_e32 v18, vcc, 0, v2, vcc
-; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
-; SDAG-NEXT: v_cndmask_b32_e64 v17, v1, v17, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v16, v0, v16, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v3, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v0, v2, v18, s[4:5]
-; SDAG-NEXT: v_ffbh_u32_e32 v18, v16
-; SDAG-NEXT: v_ffbh_u32_e32 v20, v17
+; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v3, vcc
; SDAG-NEXT: v_sub_i32_e32 v21, vcc, 0, v8
-; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
-; SDAG-NEXT: v_or_b32_e32 v2, v16, v0
-; SDAG-NEXT: v_add_i32_e64 v18, s[4:5], 32, v18
-; SDAG-NEXT: v_ffbh_u32_e32 v22, v0
-; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v9, vcc
-; SDAG-NEXT: v_or_b32_e32 v3, v17, v1
-; SDAG-NEXT: v_min_u32_e32 v18, v18, v20
-; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], 32, v22
-; SDAG-NEXT: v_ffbh_u32_e32 v22, v1
-; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[10:11]
-; SDAG-NEXT: v_cndmask_b32_e64 v30, v9, v23, s[4:5]
-; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v10, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v31, v8, v21, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
-; SDAG-NEXT: v_min_u32_e32 v3, v20, v22
-; SDAG-NEXT: v_add_i32_e64 v8, s[8:9], 64, v18
-; SDAG-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[8:9]
+; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
+; SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v20, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v17, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v16, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5]
+; SDAG-NEXT: v_subb_u32_e32 v18, vcc, 0, v9, vcc
+; SDAG-NEXT: v_or_b32_e32 v17, v1, v3
+; SDAG-NEXT: v_or_b32_e32 v16, v0, v2
+; SDAG-NEXT: v_ffbh_u32_e32 v20, v2
+; SDAG-NEXT: v_ffbh_u32_e32 v22, v3
+; SDAG-NEXT: v_ffbh_u32_e32 v23, v0
+; SDAG-NEXT: v_ffbh_u32_e32 v24, v1
+; SDAG-NEXT: v_subb_u32_e32 v25, vcc, 0, v10, vcc
+; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[16:17]
+; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20
+; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v23
; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v11, vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v2, v10, v9, s[4:5]
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; SDAG-NEXT: v_min_u32_e32 v16, v16, v22
+; SDAG-NEXT: v_min_u32_e32 v17, v17, v24
+; SDAG-NEXT: v_cmp_gt_i64_e32 vcc, 0, v[10:11]
+; SDAG-NEXT: v_cndmask_b32_e32 v11, v11, v20, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v10, v10, v25, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v30, v9, v18, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v31, v8, v21, vcc
+; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17
+; SDAG-NEXT: v_addc_u32_e64 v18, s[6:7], 0, 0, vcc
+; SDAG-NEXT: v_or_b32_e32 v9, v30, v11
+; SDAG-NEXT: v_or_b32_e32 v8, v31, v10
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, 0, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v10, v8, v3, vcc
-; SDAG-NEXT: v_ffbh_u32_e32 v9, v31
-; SDAG-NEXT: v_ffbh_u32_e32 v21, v30
-; SDAG-NEXT: v_cndmask_b32_e64 v3, v11, v20, s[4:5]
-; SDAG-NEXT: v_or_b32_e32 v8, v31, v2
-; SDAG-NEXT: v_add_i32_e32 v11, vcc, 32, v9
-; SDAG-NEXT: v_ffbh_u32_e32 v20, v2
-; SDAG-NEXT: v_or_b32_e32 v9, v30, v3
-; SDAG-NEXT: v_min_u32_e32 v11, v11, v21
-; SDAG-NEXT: v_add_i32_e32 v20, vcc, 32, v20
-; SDAG-NEXT: v_ffbh_u32_e32 v21, v3
+; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
+; SDAG-NEXT: v_ffbh_u32_e32 v17, v10
+; SDAG-NEXT: v_ffbh_u32_e32 v20, v11
+; SDAG-NEXT: v_ffbh_u32_e32 v21, v31
+; SDAG-NEXT: v_ffbh_u32_e32 v22, v30
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
-; SDAG-NEXT: v_min_u32_e32 v8, v20, v21
-; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 64, v11
-; SDAG-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
-; SDAG-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[2:3]
-; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5]
-; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
-; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v8, v10
-; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc
-; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v10
+; SDAG-NEXT: v_add_i32_e64 v8, s[6:7], 32, v17
+; SDAG-NEXT: v_add_i32_e64 v9, s[6:7], 32, v21
+; SDAG-NEXT: v_min_u32_e32 v8, v8, v20
+; SDAG-NEXT: v_min_u32_e32 v9, v9, v22
+; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
+; SDAG-NEXT: v_add_i32_e32 v9, vcc, 64, v9
+; SDAG-NEXT: v_addc_u32_e64 v17, s[4:5], 0, 0, vcc
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; SDAG-NEXT: v_cndmask_b32_e64 v17, v17, 0, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
+; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v8, v16
+; SDAG-NEXT: v_subb_u32_e32 v17, vcc, v17, v18, vcc
+; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v16
; SDAG-NEXT: v_subb_u32_e32 v18, vcc, 0, v19, vcc
-; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
+; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17]
; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
; SDAG-NEXT: v_subb_u32_e32 v19, vcc, 0, v19, vcc
; SDAG-NEXT: v_or_b32_e32 v8, v8, v18
-; SDAG-NEXT: v_or_b32_e32 v9, v11, v19
+; SDAG-NEXT: v_or_b32_e32 v9, v17, v19
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
@@ -1633,72 +1633,72 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v34, v1, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v34, v3, 0, s[4:5]
; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
-; SDAG-NEXT: v_cndmask_b32_e64 v32, v0, 0, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v27, v17, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v32, v2, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v27, v1, 0, s[4:5]
; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc
-; SDAG-NEXT: v_cndmask_b32_e64 v33, v16, 0, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v33, v0, 0, s[4:5]
; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
; SDAG-NEXT: s_cbranch_execz .LBB2_6
; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
-; SDAG-NEXT: v_add_i32_e32 v32, vcc, 1, v10
-; SDAG-NEXT: v_sub_i32_e64 v20, s[4:5], 63, v10
+; SDAG-NEXT: v_add_i32_e32 v32, vcc, 1, v16
+; SDAG-NEXT: v_sub_i32_e64 v20, s[4:5], 63, v16
; SDAG-NEXT: v_mov_b32_e32 v8, 0
; SDAG-NEXT: v_mov_b32_e32 v9, 0
-; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc
-; SDAG-NEXT: v_lshl_b64 v[20:21], v[16:17], v20
+; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v17, vcc
+; SDAG-NEXT: v_lshl_b64 v[20:21], v[0:1], v20
; SDAG-NEXT: v_addc_u32_e32 v34, vcc, 0, v18, vcc
; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v19, vcc
-; SDAG-NEXT: v_or_b32_e32 v18, v32, v34
-; SDAG-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v10
-; SDAG-NEXT: v_or_b32_e32 v19, v33, v35
-; SDAG-NEXT: v_lshl_b64 v[10:11], v[0:1], v24
-; SDAG-NEXT: v_sub_i32_e32 v25, vcc, 64, v24
-; SDAG-NEXT: v_lshl_b64 v[22:23], v[16:17], v24
-; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
-; SDAG-NEXT: v_lshr_b64 v[18:19], v[16:17], v25
-; SDAG-NEXT: v_or_b32_e32 v11, v11, v19
-; SDAG-NEXT: v_or_b32_e32 v10, v10, v18
-; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24
-; SDAG-NEXT: v_cndmask_b32_e64 v11, v21, v11, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v10, v20, v10, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v23, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v22, s[4:5]
-; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24
-; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, v1, s[4:5]
-; SDAG-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5]
+; SDAG-NEXT: v_or_b32_e32 v17, v32, v34
+; SDAG-NEXT: v_sub_i32_e32 v19, vcc, 0x7f, v16
+; SDAG-NEXT: v_or_b32_e32 v18, v33, v35
+; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v19
+; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 64, v19
+; SDAG-NEXT: v_lshl_b64 v[24:25], v[0:1], v19
+; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[17:18]
+; SDAG-NEXT: v_lshr_b64 v[16:17], v[0:1], v16
+; SDAG-NEXT: v_or_b32_e32 v17, v23, v17
+; SDAG-NEXT: v_or_b32_e32 v16, v22, v16
+; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v19
+; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, v17, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v16, v20, v16, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v25, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v24, s[4:5]
+; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19
+; SDAG-NEXT: v_cndmask_b32_e64 v17, v17, v3, s[4:5]
+; SDAG-NEXT: v_cndmask_b32_e64 v16, v16, v2, s[4:5]
; SDAG-NEXT: v_mov_b32_e32 v18, 0
; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; SDAG-NEXT: s_cbranch_execz .LBB2_5
; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
-; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v32
+; SDAG-NEXT: v_lshr_b64 v[8:9], v[0:1], v32
; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 64, v32
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
-; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
+; SDAG-NEXT: v_lshr_b64 v[24:25], v[2:3], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
; SDAG-NEXT: v_mov_b32_e32 v18, 0
; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
-; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v26
-; SDAG-NEXT: v_lshr_b64 v[48:49], v[0:1], v37
+; SDAG-NEXT: v_lshl_b64 v[26:27], v[2:3], v26
+; SDAG-NEXT: v_lshr_b64 v[48:49], v[2:3], v37
; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v30, vcc
; SDAG-NEXT: v_or_b32_e32 v9, v9, v27
; SDAG-NEXT: v_or_b32_e32 v8, v8, v26
-; SDAG-NEXT: v_addc_u32_e32 v38, vcc, -1, v2, vcc
+; SDAG-NEXT: v_addc_u32_e32 v38, vcc, -1, v10, vcc
; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v32
; SDAG-NEXT: v_cndmask_b32_e64 v9, v49, v9, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v8, v48, v8, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v27, 0, v25, s[4:5]
; SDAG-NEXT: v_cndmask_b32_e64 v26, 0, v24, s[4:5]
-; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v3, vcc
+; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v11, vcc
; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v32
-; SDAG-NEXT: v_cndmask_b32_e32 v25, v9, v17, vcc
-; SDAG-NEXT: v_cndmask_b32_e32 v24, v8, v16, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v25, v9, v1, vcc
+; SDAG-NEXT: v_cndmask_b32_e32 v24, v8, v0, vcc
; SDAG-NEXT: v_mov_b32_e32 v9, 0
; SDAG-NEXT: .LBB2_3: ; %udiv-do-while3
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1707,13 +1707,13 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
; SDAG-NEXT: v_lshrrev_b32_e32 v48, 31, v25
; SDAG-NEXT: v_lshl_b64 v[24:25], v[24:25], 1
-; SDAG-NEXT: v_lshrrev_b32_e32 v49, 31, v11
-; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
+; SDAG-NEXT: v_lshrrev_b32_e32 v49, 31, v17
+; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
; SDAG-NEXT: v_or_b32_e32 v21, v23, v21
; SDAG-NEXT: v_or_b32_e32 v20, v22, v20
; SDAG-NEXT: v_or_b32_e32 v22, v26, v48
; SDAG-NEXT: v_or_b32_e32 v23, v24, v49
-; SDAG-NEXT: v_or_b32_e32 v10, v10, v8
+; SDAG-NEXT: v_or_b32_e32 v16, v16, v8
; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v36, v23
; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v25, vcc
; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v38, v22, vcc
@@ -1721,8 +1721,8 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8
; SDAG-NEXT: v_and_b32_e32 v24, v8, v31
; SDAG-NEXT: v_and_b32_e32 v26, v8, v30
-; SDAG-NEXT: v_and_b32_e32 v48, v8, v2
-; SDAG-NEXT: v_and_b32_e32 v49, v8, v3
+; SDAG-NEXT: v_and_b32_e32 v48, v8, v10
+; SDAG-NEXT: v_and_b32_e32 v49, v8, v11
; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
; SDAG-NEXT: v_sub_i32_e32 v24, vcc, v23, v24
; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v25, v26, vcc
@@ -1735,9 +1735,9 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_or_b32_e32 v22, v32, v34
; SDAG-NEXT: v_or_b32_e32 v23, v33, v35
; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23]
-; SDAG-NEXT: v_or_b32_e32 v11, v19, v11
+; SDAG-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/148796
More information about the llvm-commits
mailing list