[llvm] AMDGPU/GlobalISel: Run redundant_and combine in RegBankCombiner (PR #112353)

Petar Avramovic via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 15 09:19:34 PDT 2024


https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/112353

>From 1a5ad8838d97eb48edc9a48bc2c4c38967f4fa2d Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Tue, 15 Oct 2024 14:47:29 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Run redundant_and combine in
 RegBankCombiner

Combine is needed to clear redundant ANDs with 1 that will be
created by reg-bank-select to clean-up high bits in register.
Fix replaceRegWith from CombinerHelper:
If copy had to be inserted, first create copy then delete MI.
If MI is deleted first insert point is not valid.
---
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  12 +-
 llvm/lib/Target/AMDGPU/AMDGPUCombine.td       |   3 +-
 .../GlobalISel/artifact-combiner-asserts.ll   |   4 +-
 .../regbankcombiner-redundant-and.mir         |  28 +++
 llvm/test/CodeGen/AMDGPU/fptoi.i128.ll        |  12 +-
 llvm/test/CodeGen/AMDGPU/itofp.i128.ll        | 166 +++++++++---------
 6 files changed, 125 insertions(+), 100 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 14e94d48bf8362..f9b1621955c217 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -178,7 +178,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
   if (MRI.constrainRegAttrs(ToReg, FromReg))
     MRI.replaceRegWith(FromReg, ToReg);
   else
-    Builder.buildCopy(ToReg, FromReg);
+    Builder.buildCopy(FromReg, ToReg);
 
   Observer.finishedChangingAllUsesOfReg();
 }
@@ -229,8 +229,8 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
-  MI.eraseFromParent();
   replaceRegWith(MRI, DstReg, SrcReg);
+  MI.eraseFromParent();
 }
 
 bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
@@ -379,8 +379,8 @@ void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI,
     Builder.buildUndef(NewDstReg);
   else
     Builder.buildBuildVector(NewDstReg, Ops);
-  MI.eraseFromParent();
   replaceRegWith(MRI, DstReg, NewDstReg);
+  MI.eraseFromParent();
 }
 
 bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI,
@@ -559,8 +559,8 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
   else
     Builder.buildMergeLikeInstr(NewDstReg, Ops);
 
-  MI.eraseFromParent();
   replaceRegWith(MRI, DstReg, NewDstReg);
+  MI.eraseFromParent();
 }
 
 bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
@@ -2825,8 +2825,8 @@ void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
   Register OldReg = MI.getOperand(0).getReg();
   Register Replacement = MI.getOperand(OpIdx).getReg();
   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
-  MI.eraseFromParent();
   replaceRegWith(MRI, OldReg, Replacement);
+  MI.eraseFromParent();
 }
 
 void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
@@ -2834,8 +2834,8 @@ void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
   Register OldReg = MI.getOperand(0).getReg();
   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
-  MI.eraseFromParent();
   replaceRegWith(MRI, OldReg, Replacement);
+  MI.eraseFromParent();
 }
 
 bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index b2a3f9392157d1..985fa8f1deff94 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -169,5 +169,6 @@ def AMDGPURegBankCombiner : GICombiner<
   "AMDGPURegBankCombinerImpl",
   [unmerge_merge, unmerge_cst, unmerge_undef,
    zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
-   fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
+   fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
+   redundant_and]> {
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
index 6dce6c1852af9b..6e4fb2678b382d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll
@@ -27,10 +27,8 @@ define hidden <2 x i64> @icmp_v2i32_zext_to_v2i64(<2 x i32> %arg) {
 ; CHECK-NEXT:    v_mov_b32_e32 v3, 0
 ; CHECK-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc_lo
-; CHECK-NEXT:    v_and_b32_e32 v2, 1, v1
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %cmp = icmp eq <2 x i32> %arg, zeroinitializer
   %sext = zext <2 x i1> %cmp to <2 x i64>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir
new file mode 100644
index 00000000000000..f87a253dcb4333
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-redundant-and.mir
@@ -0,0 +1,28 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: replaceRegWith_requires_copy
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0, $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: replaceRegWith_requires_copy
+    ; CHECK: liveins: $sgpr0, $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY [[ICMP]](s32)
+    ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sgpr(p1) = COPY $vgpr0_vgpr1
+    %1:sgpr(s32) = COPY $sgpr0
+    %2:sgpr(s32) = G_CONSTANT i32 1
+    %3:sreg_32(s32) = G_ICMP intpred(ne), %1, %2
+    %4:sgpr(s32) = G_AND %3, %2
+    G_STORE %4(s32), %0(p1) :: (store (s32), addrspace 1)
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
index 6e8e6c07217895..786fe03164690e 100644
--- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll
@@ -136,12 +136,12 @@ define i128 @fptosi_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b32_e32 v5, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 20, v5
-; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v0
+; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 20, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
 ; GISEL-NEXT:    s_mov_b64 s[4:5], 0
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v7, 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v2
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
@@ -508,12 +508,12 @@ define i128 @fptoui_f64_to_i128(double %x) {
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_mov_b32_e32 v5, v1
 ; GISEL-NEXT:    v_mov_b32_e32 v4, v0
-; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 20, v5
-; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v0
+; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 20, v5
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff
 ; GISEL-NEXT:    s_mov_b64 s[4:5], 0
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GISEL-NEXT:    v_mov_b32_e32 v7, 0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v6, 0x7ff, v2
 ; GISEL-NEXT:    v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
 ; GISEL-NEXT:    s_mov_b64 s[6:7], s[4:5]
 ; GISEL-NEXT:    v_mov_b32_e32 v0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
index 38d928a006fb20..2999ddb8315883 100644
--- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll
@@ -673,38 +673,38 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v6, v4
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v6, v5
-; GISEL-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_xor_b32_e32 v2, v6, v2
-; GISEL-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v3, v6, v3
-; GISEL-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
-; GISEL-NEXT:    v_ffbh_u32_e32 v5, v0
-; GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
-; GISEL-NEXT:    v_ffbh_u32_e32 v4, v1
-; GISEL-NEXT:    v_add_u32_e32 v5, 32, v5
-; GISEL-NEXT:    v_ffbh_u32_e32 v7, v2
-; GISEL-NEXT:    v_min_u32_e32 v4, v4, v5
-; GISEL-NEXT:    v_ffbh_u32_e32 v5, v3
+; GISEL-NEXT:    v_xor_b32_e32 v4, v6, v2
+; GISEL-NEXT:    v_sub_co_u32_e32 v2, vcc, v0, v6
+; GISEL-NEXT:    v_xor_b32_e32 v5, v6, v3
+; GISEL-NEXT:    v_subb_co_u32_e32 v3, vcc, v1, v6, vcc
+; GISEL-NEXT:    v_subb_co_u32_e32 v4, vcc, v4, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v1, v2
+; GISEL-NEXT:    v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
+; GISEL-NEXT:    v_ffbh_u32_e32 v0, v3
+; GISEL-NEXT:    v_add_u32_e32 v1, 32, v1
+; GISEL-NEXT:    v_ffbh_u32_e32 v7, v4
+; GISEL-NEXT:    v_min_u32_e32 v0, v0, v1
+; GISEL-NEXT:    v_ffbh_u32_e32 v1, v5
 ; GISEL-NEXT:    v_add_u32_e32 v7, 32, v7
-; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[2:3]
-; GISEL-NEXT:    v_add_u32_e32 v4, 64, v4
-; GISEL-NEXT:    v_min_u32_e32 v5, v5, v7
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, v5, v4, vcc
+; GISEL-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[4:5]
+; GISEL-NEXT:    v_add_u32_e32 v0, 64, v0
+; GISEL-NEXT:    v_min_u32_e32 v1, v1, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v9, v1, v0, vcc
 ; GISEL-NEXT:    v_sub_u32_e32 v8, 0x80, v9
 ; GISEL-NEXT:    v_sub_u32_e32 v7, 0x7f, v9
 ; GISEL-NEXT:    v_cmp_ge_i32_e32 vcc, 53, v8
 ; GISEL-NEXT:    ; implicit-def: $vgpr10
-; GISEL-NEXT:    ; implicit-def: $vgpr4_vgpr5
+; GISEL-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
 ; GISEL-NEXT:  ; %bb.2: ; %itofp-if-else
-; GISEL-NEXT:    v_add_u32_e32 v2, 0xffffffb5, v9
-; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v2, v[0:1]
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v2
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, 0, v0, vcc
+; GISEL-NEXT:    v_add_u32_e32 v4, 0xffffffb5, v9
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], v4, v[2:3]
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v1, vcc
 ; GISEL-NEXT:    ; implicit-def: $vgpr8
-; GISEL-NEXT:    ; implicit-def: $vgpr0
+; GISEL-NEXT:    ; implicit-def: $vgpr2
 ; GISEL-NEXT:    ; implicit-def: $vgpr9
 ; GISEL-NEXT:  ; %bb.3: ; %Flow3
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
@@ -721,89 +721,88 @@ define double @sitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:  ; %bb.6: ; %itofp-sw-default
 ; GISEL-NEXT:    v_sub_u32_e32 v14, 0x49, v9
 ; GISEL-NEXT:    v_sub_u32_e32 v10, 64, v14
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v14, v[0:1]
-; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v14, v[2:3]
+; GISEL-NEXT:    v_lshlrev_b64 v[10:11], v10, v[4:5]
 ; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v14
-; GISEL-NEXT:    v_or_b32_e32 v10, v4, v10
-; GISEL-NEXT:    v_or_b32_e32 v11, v5, v11
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], v15, v[2:3]
-; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v14, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v14, v[4:5]
+; GISEL-NEXT:    v_or_b32_e32 v10, v0, v10
+; GISEL-NEXT:    v_or_b32_e32 v11, v1, v11
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v15, v[4:5]
 ; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
+; GISEL-NEXT:    v_add_u32_e32 v9, 55, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v11, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
-; GISEL-NEXT:    v_add_u32_e32 v14, 55, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v11, vcc
-; GISEL-NEXT:    v_sub_u32_e32 v11, 64, v14
-; GISEL-NEXT:    v_cndmask_b32_e64 v13, v4, v0, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v4, v5, v1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v5, 0, v12, vcc
-; GISEL-NEXT:    v_lshrrev_b64 v[9:10], v14, -1
-; GISEL-NEXT:    v_lshlrev_b64 v[11:12], v11, -1
-; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v14
-; GISEL-NEXT:    v_or_b32_e32 v16, v9, v11
-; GISEL-NEXT:    v_or_b32_e32 v17, v10, v12
-; GISEL-NEXT:    v_lshrrev_b64 v[11:12], v15, -1
-; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v11, v11, v16, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v17, vcc
-; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v14
-; GISEL-NEXT:    v_cndmask_b32_e32 v9, 0, v9, vcc
-; GISEL-NEXT:    v_cndmask_b32_e32 v10, 0, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v11, v11, -1, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v12, v12, -1, s[4:5]
-; GISEL-NEXT:    v_and_b32_e32 v2, v9, v2
-; GISEL-NEXT:    v_and_b32_e32 v3, v10, v3
-; GISEL-NEXT:    v_and_or_b32 v0, v11, v0, v2
-; GISEL-NEXT:    v_and_or_b32 v1, v12, v1, v3
+; GISEL-NEXT:    v_cndmask_b32_e32 v11, 0, v12, vcc
+; GISEL-NEXT:    v_sub_u32_e32 v12, 64, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v14, v0, v2, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v10, v1, v3, s[4:5]
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], v9, -1
+; GISEL-NEXT:    v_lshlrev_b64 v[12:13], v12, -1
+; GISEL-NEXT:    v_subrev_u32_e32 v15, 64, v9
+; GISEL-NEXT:    v_or_b32_e32 v16, v0, v12
+; GISEL-NEXT:    v_or_b32_e32 v17, v1, v13
+; GISEL-NEXT:    v_lshrrev_b64 v[12:13], v15, -1
+; GISEL-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v12, v12, v16, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v13, v13, v17, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v9, v12, -1, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v12, v13, -1, s[4:5]
+; GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_and_or_b32 v0, v9, v2, v0
+; GISEL-NEXT:    v_and_or_b32 v1, v12, v3, v1
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
-; GISEL-NEXT:    v_or_b32_e32 v3, v13, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v3
-; GISEL-NEXT:    v_mov_b32_e32 v1, v4
-; GISEL-NEXT:    v_mov_b32_e32 v2, v5
-; GISEL-NEXT:    v_mov_b32_e32 v3, v6
+; GISEL-NEXT:    v_or_b32_e32 v9, v14, v0
+; GISEL-NEXT:    v_mov_b32_e32 v2, v9
+; GISEL-NEXT:    v_mov_b32_e32 v3, v10
+; GISEL-NEXT:    v_mov_b32_e32 v4, v11
+; GISEL-NEXT:    v_mov_b32_e32 v5, v12
 ; GISEL-NEXT:  .LBB2_7: ; %Flow1
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[12:13]
 ; GISEL-NEXT:  .LBB2_8: ; %Flow2
 ; GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[10:11]
 ; GISEL-NEXT:    s_cbranch_execz .LBB2_10
 ; GISEL-NEXT:  ; %bb.9: ; %itofp-sw-bb
-; GISEL-NEXT:    v_lshlrev_b64 v[9:10], 1, v[0:1]
-; GISEL-NEXT:    v_lshlrev_b64 v[2:3], 1, v[2:3]
-; GISEL-NEXT:    v_lshrrev_b32_e32 v0, 31, v1
-; GISEL-NEXT:    v_or_b32_e32 v11, v2, v0
-; GISEL-NEXT:    v_mov_b32_e32 v0, v9
-; GISEL-NEXT:    v_mov_b32_e32 v1, v10
-; GISEL-NEXT:    v_mov_b32_e32 v2, v11
-; GISEL-NEXT:    v_mov_b32_e32 v3, v12
+; GISEL-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
+; GISEL-NEXT:    v_lshlrev_b64 v[0:1], 1, v[2:3]
+; GISEL-NEXT:    v_lshrrev_b32_e32 v2, 31, v3
+; GISEL-NEXT:    v_or_b32_e32 v2, v4, v2
+; GISEL-NEXT:    v_mov_b32_e32 v5, v3
+; GISEL-NEXT:    v_mov_b32_e32 v4, v2
+; GISEL-NEXT:    v_mov_b32_e32 v3, v1
+; GISEL-NEXT:    v_mov_b32_e32 v2, v0
 ; GISEL-NEXT:  .LBB2_10: ; %itofp-sw-epilog
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
-; GISEL-NEXT:    v_bfe_u32 v3, v0, 2, 1
-; GISEL-NEXT:    v_or_b32_e32 v0, v0, v3
-; GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
-; GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GISEL-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 2, v[0:1]
+; GISEL-NEXT:    v_bfe_u32 v0, v2, 2, 1
+; GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
+; GISEL-NEXT:    v_add_co_u32_e32 v2, vcc, 1, v0
+; GISEL-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GISEL-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 2, v[2:3]
 ; GISEL-NEXT:    v_mov_b32_e32 v9, 0
-; GISEL-NEXT:    v_and_b32_e32 v10, 0x800000, v1
+; GISEL-NEXT:    v_and_b32_e32 v10, 0x800000, v3
 ; GISEL-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[9:10]
-; GISEL-NEXT:    v_lshl_or_b32 v10, v2, 30, v5
+; GISEL-NEXT:    v_lshl_or_b32 v10, v4, 30, v1
 ; GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GISEL-NEXT:  ; %bb.11: ; %itofp-if-then20
-; GISEL-NEXT:    v_lshrrev_b64 v[4:5], 3, v[0:1]
+; GISEL-NEXT:    v_lshrrev_b64 v[0:1], 3, v[2:3]
 ; GISEL-NEXT:    v_mov_b32_e32 v7, v8
-; GISEL-NEXT:    v_lshl_or_b32 v10, v2, 29, v5
+; GISEL-NEXT:    v_lshl_or_b32 v10, v4, 29, v1
 ; GISEL-NEXT:  ; %bb.12: ; %Flow
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GISEL-NEXT:  .LBB2_13: ; %Flow4
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[8:9]
-; GISEL-NEXT:    v_and_b32_e32 v0, 0x80000000, v6
-; GISEL-NEXT:    v_mov_b32_e32 v1, 0x3ff00000
-; GISEL-NEXT:    v_mov_b32_e32 v2, 0xfffff
-; GISEL-NEXT:    v_lshl_add_u32 v1, v7, 20, v1
-; GISEL-NEXT:    v_and_or_b32 v2, v10, v2, v0
-; GISEL-NEXT:    v_and_or_b32 v0, v4, -1, 0
-; GISEL-NEXT:    v_or3_b32 v1, v2, v1, 0
+; GISEL-NEXT:    v_and_b32_e32 v1, 0x80000000, v6
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0x3ff00000
+; GISEL-NEXT:    v_mov_b32_e32 v3, 0xfffff
+; GISEL-NEXT:    v_lshl_add_u32 v2, v7, 20, v2
+; GISEL-NEXT:    v_and_or_b32 v1, v10, v3, v1
+; GISEL-NEXT:    v_or3_b32 v1, v1, v2, 0
 ; GISEL-NEXT:  .LBB2_14: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -1083,7 +1082,6 @@ define double @uitofp_i128_to_f64(i128 %x) {
 ; GISEL-NEXT:    v_mov_b32_e32 v0, 0x3ff00000
 ; GISEL-NEXT:    v_lshl_add_u32 v0, v6, 20, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff, v9
-; GISEL-NEXT:    v_and_or_b32 v4, v4, -1, 0
 ; GISEL-NEXT:    v_or3_b32 v5, v1, v0, 0
 ; GISEL-NEXT:  .LBB3_14: ; %Flow5
 ; GISEL-NEXT:    s_or_b64 exec, exec, s[6:7]



More information about the llvm-commits mailing list