[llvm] AMDGPU/GLobalISel: Run redundant_and combine in RegBankCombiner (PR #112353)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 15 05:58:44 PDT 2024
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112353
>From 0bd879b9173b648d57b2a9401976598246be916d Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 15 Oct 2024 14:47:29 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Run redundant_and combine in
RegBankCombiner
Combine is needed to clear redundant ANDs with 1 that will be
created by reg-bank-select to clean-up high bits in register.
Fix replaceRegWith from CombinerHelper:
If copy had to be inserted, first create copy then delete MI.
If MI is deleted first insert point is not valid.
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 12 +-
llvm/lib/Target/AMDGPU/AMDGPUCombine.td | 3 +-
.../GlobalISel/artifact-combiner-asserts.ll | 4 +-
llvm/test/CodeGen/AMDGPU/fptoi.i128.ll | 12 +-
llvm/test/CodeGen/AMDGPU/itofp.i128.ll | 166 +++++++++---------
5 files changed, 97 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 14e94d48bf8362..f9b1621955c217 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -178,7 +178,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
if (MRI.constrainRegAttrs(ToReg, FromReg))
MRI.replaceRegWith(FromReg, ToReg);
else
- Builder.buildCopy(ToReg, FromReg);
+ Builder.buildCopy(FromReg, ToReg);
Observer.finishedChangingAllUsesOfReg();
}
@@ -229,8 +229,8 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- MI.eraseFromParent();
replaceRegWith(MRI, DstReg, SrcReg);
+ MI.eraseFromParent();
}
bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
@@ -379,8 +379,8 @@ void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI,
Builder.buildUndef(NewDstReg);
else
Builder.buildBuildVector(NewDstReg, Ops);
- MI.eraseFromParent();
replaceRegWith(MRI, DstReg, NewDstReg);
+ MI.eraseFromParent();
}
bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI,
@@ -559,8 +559,8 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
else
Builder.buildMergeLikeInstr(NewDstReg, Ops);
- MI.eraseFromParent();
replaceRegWith(MRI, DstReg, NewDstReg);
+ MI.eraseFromParent();
}
bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
@@ -2825,8 +2825,8 @@ void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
Register OldReg = MI.getOperand(0).getReg();
Register Replacement = MI.getOperand(OpIdx).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
- MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
+ MI.eraseFromParent();
}
void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
@@ -2834,8 +2834,8 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
- MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
+ MI.eraseFromParent();
}
bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b2a3f9392157d1..985fa8f1deff94 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -169,5 +169,6 @@ def AMDGPURegBankCombiner : GICombiner<
"AMDGPURegBankCombinerImpl",
[unmerge_merge, unmerge_cst, unmerge_undef,
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
- fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
+ fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
+ redundant_and]> {
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
index 6dce6c1852af9b..6e4fb2678b382d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
@@ -27,10 +27,8 @@ define hidden <2 x i64> @icmp_v2i32_zext_to_v2i64(<2 x i32> %arg) {
; CHECK-NEXT: v_mov_b32_e32 v3, 0
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; CHECK-NEXT: v_and_b32_e32 v2, 1, v1
; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
; CHECK-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %arg, zeroinitializer
%sext = zext <2 x i1> %cmp to <2 x i64>
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 6e8e6c07217895..786fe03164690e 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -136,12 +136,12 @@ define i128 @fptosi_f64_to_i128(double %x) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v5, v1
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v5
-; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v0
+; GISEL-NEXT: v_lshrrev_b32_e32 v2, 20, v5
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
; GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: v_mov_b32_e32 v7, 0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v2
; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -508,12 +508,12 @@ define i128 @fptoui_f64_to_i128(double %x) {
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v5, v1
; GISEL-NEXT: v_mov_b32_e32 v4, v0
-; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v5
-; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v0
+; GISEL-NEXT: v_lshrrev_b32_e32 v2, 20, v5
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
; GISEL-NEXT: s_mov_b64 s[4:5], 0
-; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: v_mov_b32_e32 v7, 0
+; GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v2
; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 38d928a006fb20..2999ddb8315883 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -673,38 +673,38 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
; GISEL-NEXT: v_xor_b32_e32 v0, v6, v4
; GISEL-NEXT: v_xor_b32_e32 v1, v6, v5
-; GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v6
-; GISEL-NEXT: v_xor_b32_e32 v2, v6, v2
-; GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT: v_xor_b32_e32 v3, v6, v3
-; GISEL-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
-; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
-; GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
-; GISEL-NEXT: v_ffbh_u32_e32 v4, v1
-; GISEL-NEXT: v_add_u32_e32 v5, 32, v5
-; GISEL-NEXT: v_ffbh_u32_e32 v7, v2
-; GISEL-NEXT: v_min_u32_e32 v4, v4, v5
-; GISEL-NEXT: v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT: v_xor_b32_e32 v4, v6, v2
+; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
+; GISEL-NEXT: v_xor_b32_e32 v5, v6, v3
+; GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v6, vcc
+; GISEL-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v6, vcc
+; GISEL-NEXT: v_ffbh_u32_e32 v1, v2
+; GISEL-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
+; GISEL-NEXT: v_ffbh_u32_e32 v0, v3
+; GISEL-NEXT: v_add_u32_e32 v1, 32, v1
+; GISEL-NEXT: v_ffbh_u32_e32 v7, v4
+; GISEL-NEXT: v_min_u32_e32 v0, v0, v1
+; GISEL-NEXT: v_ffbh_u32_e32 v1, v5
; GISEL-NEXT: v_add_u32_e32 v7, 32, v7
-; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT: v_add_u32_e32 v4, 64, v4
-; GISEL-NEXT: v_min_u32_e32 v5, v5, v7
-; GISEL-NEXT: v_cndmask_b32_e32 v9, v5, v4, vcc
+; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT: v_add_u32_e32 v0, 64, v0
+; GISEL-NEXT: v_min_u32_e32 v1, v1, v7
+; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v0, vcc
; GISEL-NEXT: v_sub_u32_e32 v8, 0x80, v9
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v9
; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 53, v8
; GISEL-NEXT: ; implicit-def: $vgpr10
-; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffb5, v9
-; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT: v_add_u32_e32 v4, 0xffffffb5, v9
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], v4, v[2:3]
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
; GISEL-NEXT: ; implicit-def: $vgpr8
-; GISEL-NEXT: ; implicit-def: $vgpr0
+; GISEL-NEXT: ; implicit-def: $vgpr2
; GISEL-NEXT: ; implicit-def: $vgpr9
; GISEL-NEXT: ; %bb.3: ; %Flow3
; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5]
@@ -721,89 +721,88 @@ define double @sitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
; GISEL-NEXT: v_sub_u32_e32 v14, 0x49, v9
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v14
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1]
-; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v14, v[2:3]
+; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[4:5]
; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v14
-; GISEL-NEXT: v_or_b32_e32 v10, v4, v10
-; GISEL-NEXT: v_or_b32_e32 v11, v5, v11
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], v15, v[2:3]
-; GISEL-NEXT: v_lshrrev_b64 v[12:13], v14, v[2:3]
+; GISEL-NEXT: v_lshrrev_b64 v[12:13], v14, v[4:5]
+; GISEL-NEXT: v_or_b32_e32 v10, v0, v10
+; GISEL-NEXT: v_or_b32_e32 v11, v1, v11
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v15, v[4:5]
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT: v_add_u32_e32 v9, 55, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
-; GISEL-NEXT: v_add_u32_e32 v14, 55, v9
-; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
-; GISEL-NEXT: v_sub_u32_e32 v11, 64, v14
-; GISEL-NEXT: v_cndmask_b32_e64 v13, v4, v0, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v12, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[9:10], v14, -1
-; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v14
-; GISEL-NEXT: v_or_b32_e32 v16, v9, v11
-; GISEL-NEXT: v_or_b32_e32 v17, v10, v12
-; GISEL-NEXT: v_lshrrev_b64 v[11:12], v15, -1
-; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc
-; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
-; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
-; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
-; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
-; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
-; GISEL-NEXT: v_and_or_b32 v0, v11, v0, v2
-; GISEL-NEXT: v_and_or_b32 v1, v12, v1, v3
+; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc
+; GISEL-NEXT: v_sub_u32_e32 v12, 64, v9
+; GISEL-NEXT: v_cndmask_b32_e64 v14, v0, v2, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v10, v1, v3, s[4:5]
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], v9, -1
+; GISEL-NEXT: v_lshlrev_b64 v[12:13], v12, -1
+; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v9
+; GISEL-NEXT: v_or_b32_e32 v16, v0, v12
+; GISEL-NEXT: v_or_b32_e32 v17, v1, v13
+; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
+; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
+; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT: v_cndmask_b32_e64 v9, v12, -1, s[4:5]
+; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT: v_and_b32_e32 v0, v0, v4
+; GISEL-NEXT: v_and_b32_e32 v1, v1, v5
+; GISEL-NEXT: v_and_or_b32 v0, v9, v2, v0
+; GISEL-NEXT: v_and_or_b32 v1, v12, v3, v1
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT: v_or_b32_e32 v3, v13, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v3
-; GISEL-NEXT: v_mov_b32_e32 v1, v4
-; GISEL-NEXT: v_mov_b32_e32 v2, v5
-; GISEL-NEXT: v_mov_b32_e32 v3, v6
+; GISEL-NEXT: v_or_b32_e32 v9, v14, v0
+; GISEL-NEXT: v_mov_b32_e32 v2, v9
+; GISEL-NEXT: v_mov_b32_e32 v3, v10
+; GISEL-NEXT: v_mov_b32_e32 v4, v11
+; GISEL-NEXT: v_mov_b32_e32 v5, v12
; GISEL-NEXT: .LBB2_7: ; %Flow1
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
; GISEL-NEXT: .LBB2_8: ; %Flow2
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
; GISEL-NEXT: s_cbranch_execz .LBB2_10
; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
-; GISEL-NEXT: v_lshlrev_b64 v[9:10], 1, v[0:1]
-; GISEL-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
-; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v1
-; GISEL-NEXT: v_or_b32_e32 v11, v2, v0
-; GISEL-NEXT: v_mov_b32_e32 v0, v9
-; GISEL-NEXT: v_mov_b32_e32 v1, v10
-; GISEL-NEXT: v_mov_b32_e32 v2, v11
-; GISEL-NEXT: v_mov_b32_e32 v3, v12
+; GISEL-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
+; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[2:3]
+; GISEL-NEXT: v_lshrrev_b32_e32 v2, 31, v3
+; GISEL-NEXT: v_or_b32_e32 v2, v4, v2
+; GISEL-NEXT: v_mov_b32_e32 v5, v3
+; GISEL-NEXT: v_mov_b32_e32 v4, v2
+; GISEL-NEXT: v_mov_b32_e32 v3, v1
+; GISEL-NEXT: v_mov_b32_e32 v2, v0
; GISEL-NEXT: .LBB2_10: ; %itofp-sw-epilog
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT: v_bfe_u32 v3, v0, 2, 1
-; GISEL-NEXT: v_or_b32_e32 v0, v0, v3
-; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
-; GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GISEL-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT: v_bfe_u32 v0, v2, 2, 1
+; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v0
+; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], 2, v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v9, 0
-; GISEL-NEXT: v_and_b32_e32 v10, 0x800000, v1
+; GISEL-NEXT: v_and_b32_e32 v10, 0x800000, v3
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10]
-; GISEL-NEXT: v_lshl_or_b32 v10, v2, 30, v5
+; GISEL-NEXT: v_lshl_or_b32 v10, v4, 30, v1
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT: v_lshrrev_b64 v[0:1], 3, v[2:3]
; GISEL-NEXT: v_mov_b32_e32 v7, v8
-; GISEL-NEXT: v_lshl_or_b32 v10, v2, 29, v5
+; GISEL-NEXT: v_lshl_or_b32 v10, v4, 29, v1
; GISEL-NEXT: ; %bb.12: ; %Flow
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
; GISEL-NEXT: .LBB2_13: ; %Flow4
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
-; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6
-; GISEL-NEXT: v_mov_b32_e32 v1, 0x3ff00000
-; GISEL-NEXT: v_mov_b32_e32 v2, 0xfffff
-; GISEL-NEXT: v_lshl_add_u32 v1, v7, 20, v1
-; GISEL-NEXT: v_and_or_b32 v2, v10, v2, v0
-; GISEL-NEXT: v_and_or_b32 v0, v4, -1, 0
-; GISEL-NEXT: v_or3_b32 v1, v2, v1, 0
+; GISEL-NEXT: v_and_b32_e32 v1, 0x80000000, v6
+; GISEL-NEXT: v_mov_b32_e32 v2, 0x3ff00000
+; GISEL-NEXT: v_mov_b32_e32 v3, 0xfffff
+; GISEL-NEXT: v_lshl_add_u32 v2, v7, 20, v2
+; GISEL-NEXT: v_and_or_b32 v1, v10, v3, v1
+; GISEL-NEXT: v_or3_b32 v1, v1, v2, 0
; GISEL-NEXT: .LBB2_14: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
; GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1083,7 +1082,6 @@ define double @uitofp_i128_to_f64(i128 %x) {
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff00000
; GISEL-NEXT: v_lshl_add_u32 v0, v6, 20, v0
; GISEL-NEXT: v_and_b32_e32 v1, 0xfffff, v9
-; GISEL-NEXT: v_and_or_b32 v4, v4, -1, 0
; GISEL-NEXT: v_or3_b32 v5, v1, v0, 0
; GISEL-NEXT: .LBB3_14: ; %Flow5
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
More information about the llvm-commits
mailing list