[llvm] [Codegen] Remove redundant instruction using machinelateCleanup (PR #139716)

Rohit Aggarwal via llvm-commits llvm-commits at lists.llvm.org
Mon May 19 06:53:54 PDT 2025


https://github.com/rohitaggarwal007 updated https://github.com/llvm/llvm-project/pull/139716

>From 214caf495e9198ba853da5a89dfafda1576808a2 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 13 May 2025 17:20:55 +0530
Subject: [PATCH 1/7] Remove redundant pseudo mov instruction

---
 llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp |   4 +-
 .../implicit-def-subreg-to-reg-regression.ll  |   1 -
 llvm/test/CodeGen/AMDGPU/call-waitcnt.ll      |   1 -
 .../CodeGen/AMDGPU/captured-frame-index.ll    |   3 +-
 .../AMDGPU/cgp-addressing-modes-gfx1030.ll    |   1 -
 .../AMDGPU/cgp-addressing-modes-gfx908.ll     |   1 -
 .../CodeGen/AMDGPU/cgp-addressing-modes.ll    |   3 +-
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        |  12 -
 ...frame-setup-without-sgpr-to-vgpr-spills.ll |   2 -
 .../AMDGPU/gfx-callable-return-types.ll       |  28 +--
 .../local-stack-alloc-block-sp-reference.ll   |   2 -
 .../AMDGPU/preserve-wwm-copy-dst-reg.ll       |   2 -
 .../AMDGPU/required-export-priority.ll        |   1 -
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      |  15 +-
 .../CodeGen/AMDGPU/spill-scavenge-offset.ll   |   8 -
 .../CodeGen/X86/2007-11-30-LoadFolding-Bug.ll |   1 -
 .../CodeGen/X86/AMX/amx-ldtilecfg-insert.ll   |   2 -
 llvm/test/CodeGen/X86/avx-load-store.ll       |   1 -
 llvm/test/CodeGen/X86/avx512-i1test.ll        |  14 +-
 llvm/test/CodeGen/X86/isel-brcond-fcmp.ll     | 106 +++++++-
 llvm/test/CodeGen/X86/isel-brcond-icmp.ll     | 232 +++++-------------
 llvm/test/CodeGen/X86/pr36602.ll              |   1 -
 llvm/test/CodeGen/X86/pr38795.ll              |   1 -
 llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |   1 -
 .../CodeGen/X86/scheduler-backtracking.ll     |   5 -
 llvm/test/CodeGen/X86/tail-opts.ll            |   5 -
 .../vector-shuffle-combining-avx512bwvl.ll    |   6 +-
 llvm/test/CodeGen/X86/x86-cmov-converter.ll   |   2 -
 llvm/test/CodeGen/X86/zext-extract_subreg.ll  |   3 -
 ...dgpu_generated_funcs.ll.generated.expected |   1 -
 ...pu_generated_funcs.ll.nogenerated.expected |   1 -
 .../x86_generated_funcs.ll.generated.expected |   1 -
 ...86_generated_funcs.ll.nogenerated.expected |   1 -
 33 files changed, 183 insertions(+), 285 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c8c8ed99d93ea..c3b6115338f05 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,11 +186,13 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg()) {
-      if (MO.isDef()) {
+      if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
         if (i == 0 && !MO.isImplicit() && !MO.isDead())
           DefedReg = MO.getReg();
         else
           return false;
+      } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+        return DefedReg.isValid();
       } else if (MO.getReg() && MO.getReg() != FrameReg)
         return false;
     } else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..08346f1a857eb 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,7 +80,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
-; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    stp xzr, xzr, [sp]
 ; CHECK-NEXT:    stp x8, xzr, [sp, #16]
 ; CHECK-NEXT:    bl _fprintf
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 9abb50651146a..6889110752ae0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,7 +41,6 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_store_dword v0, v0, s[6:7]
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 963b3a55259fa..852ffd5fa7183 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -113,8 +113,7 @@ define amdgpu_kernel void @stored_fi_to_fi() #0 {
 
 ; GCN-LABEL: {{^}}stored_fi_to_global:
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[FI]]
+; GCN: buffer_store_dword v{{[0-9]+}}
 define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
   %tmp = alloca float, addrspace(5)
   store float 0.0, ptr  addrspace(5) %tmp
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 81f768f303ca1..98771dcb441cc 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,7 +31,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
 ; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v1
 ; GCN-NEXT:    s_cbranch_execz .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %if
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 2
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index 49370e2fbf1b6..d212d7d52d841 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,7 +33,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %if
-; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 2.0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 0f9407b77aa83..8f92ee42c066a 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -72,8 +72,7 @@ done:
 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
 ; GCN: s_and_saveexec_b64
 ; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
-; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; GFX9: global_load_sbyte {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
 ; GCN: {{^}}.LBB2_2:
 ; GCN: s_or_b64 exec
 define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..a4ae669617263 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,8 +323,6 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v36, vcc, 64, v30
 ; SDAG-NEXT:    v_lshr_b64 v[37:38], v[6:7], v30
 ; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v29
-; SDAG-NEXT:    v_mov_b32_e32 v12, 0
-; SDAG-NEXT:    v_mov_b32_e32 v13, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v14, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v15, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -1107,8 +1105,6 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v28, vcc, 64, v22
 ; SDAG-NEXT:    v_lshr_b64 v[29:30], v[6:7], v22
 ; SDAG-NEXT:    v_add_i32_e32 v26, vcc, -1, v12
-; SDAG-NEXT:    v_mov_b32_e32 v20, 0
-; SDAG-NEXT:    v_mov_b32_e32 v21, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v10, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v11, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -1679,8 +1675,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v37, vcc, 64, v32
 ; SDAG-NEXT:    v_lshr_b64 v[24:25], v[0:1], v32
 ; SDAG-NEXT:    v_add_i32_e32 v36, vcc, -1, v31
-; SDAG-NEXT:    v_mov_b32_e32 v18, 0
-; SDAG-NEXT:    v_mov_b32_e32 v19, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v22, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v23, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -1874,8 +1868,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v51, vcc, 64, v38
 ; SDAG-NEXT:    v_lshr_b64 v[22:23], v[4:5], v38
 ; SDAG-NEXT:    v_add_i32_e32 v50, vcc, -1, v37
-; SDAG-NEXT:    v_mov_b32_e32 v18, 0
-; SDAG-NEXT:    v_mov_b32_e32 v19, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v20, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v21, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -2562,8 +2554,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v35, vcc, 64, v30
 ; SDAG-NEXT:    v_lshr_b64 v[26:27], v[2:3], v30
 ; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v8
-; SDAG-NEXT:    v_mov_b32_e32 v20, 0
-; SDAG-NEXT:    v_mov_b32_e32 v21, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v24, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v25, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -2737,8 +2727,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v39, vcc, 64, v34
 ; SDAG-NEXT:    v_lshr_b64 v[26:27], v[6:7], v34
 ; SDAG-NEXT:    v_add_i32_e32 v38, vcc, -1, v12
-; SDAG-NEXT:    v_mov_b32_e32 v22, 0
-; SDAG-NEXT:    v_mov_b32_e32 v23, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v24, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v25, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 13884eb788d8a..a3c7eb8c56fb0 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,7 +53,6 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    v_writelane_b32 v0, s31, 0
@@ -77,7 +76,6 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 6384fdba7a45a..e8aa64f0b8f15 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
-; GFX11-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT:    v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT:    v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT:    v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT:    v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT:    v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT:    v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT:    v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT:    v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT:    v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT:    v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT:    v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT:    v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT:    v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT:    v_mov_b32_e32 v31, 0
 ; GFX11-NEXT:    s_mov_b32 s1, return_72xi32 at abs32@hi
 ; GFX11-NEXT:    s_mov_b32 s0, return_72xi32 at abs32@lo
 ; GFX11-NEXT:    v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index a3ebaec4811a9..97b7e26d1230b 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,12 +225,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; MUBUF-NEXT:  ; %bb.2: ; %split
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x4000
 ; MUBUF-NEXT:    v_or_b32_e32 v0, 0x12d4, v1
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x4000
 ; MUBUF-NEXT:    s_movk_i32 s4, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v5, v0, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_or_b32_e32 v0, 0x12d0, v1
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x4000
 ; MUBUF-NEXT:    s_or_b32 s4, s4, 0x12c0
 ; MUBUF-NEXT:    buffer_load_dword v4, v0, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 2d95ec6f699dc..3d6e7c532348f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,7 +395,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[16:17]
-; GFX908-NEXT:    s_mov_b64 s[16:17], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
 ; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    v_writelane_b32 v2, s31, 0
@@ -743,7 +742,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[4:5]
-; GFX908-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
 ; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index 4aa1ddee2efe3..e145380dca59e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,7 +267,6 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 16, v2, vcc_lo
-; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    scratch_store_b32 v0, v1, off
 ; GCN-NEXT:    scratch_load_b32 v0, off, off
 ; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 35234236b848f..ddb7d6b9c3936 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,11 +388,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
 ; GCN-NEXT:    s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32 at gotpcrel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32 at gotpcrel32@hi+12
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_writelane_b32 v40, s30, 0
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
@@ -423,6 +419,9 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
 ; GCN-NEXT:    v_mov_b32_e32 v29, 0
 ; GCN-NEXT:    v_mov_b32_e32 v30, 0
 ; GCN-NEXT:    v_writelane_b32 v40, s31, 1
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GCN-NEXT:    v_readlane_b32 s31, v40, 1
@@ -528,10 +527,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
 ; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:48
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
-; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0
@@ -560,6 +555,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
 ; GCN-NEXT:    v_mov_b32_e32 v28, 0
 ; GCN-NEXT:    v_mov_b32_e32 v29, 0
 ; GCN-NEXT:    v_mov_b32_e32 v30, 0
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[4:5]
 entry:
@@ -928,7 +926,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
 ; GCN-NEXT:    s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed at rel32@hi+12
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
-; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 50056b62b3397..e9aebeef6ea6d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,7 +9971,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s34, 0x80c00
@@ -9989,7 +9988,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10007,7 +10005,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s34, 0x81400
@@ -10025,7 +10022,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10043,7 +10039,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s34, 0x81c00
@@ -10061,7 +10056,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
-; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10105,7 +10099,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[34:35]
-; GFX6-NEXT:    s_mov_b64 s[34:35], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s44, 0x82c00
@@ -10165,7 +10158,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX6-NEXT:    s_mov_b64 s[36:37], s[0:1]
-; GFX6-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s6, 0x80800
diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 8d690ba06e3bd..ac88ed8d80ff8 100644
--- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -35,7 +35,6 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
 ; CHECK-NEXT:    andl $1, %ebp
 ; CHECK-NEXT:    xorpd %xmm0, %xmm0
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_7: # %bb.i28.i
diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
index 06cf968512db8..2f6e2eae85c25 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
@@ -174,7 +174,6 @@ define dso_local void @test4(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    incl %edi
 ; CHECK-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB3_4
 ; CHECK-NEXT:  .LBB3_2: # %amx2
@@ -190,7 +189,6 @@ define dso_local void @test4(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    decl %edi
 ; CHECK-NEXT:    movb %dil, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB3_2
 ; CHECK-NEXT:  .LBB3_4: # %amx1
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 3f856d33145d8..ef7edf700ca9e 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -216,7 +216,6 @@ define void @f_f() nounwind {
 ; CHECK-NEXT:    jne .LBB9_2
 ; CHECK-NEXT:  # %bb.1: # %cif_mask_all
 ; CHECK-NEXT:  .LBB9_2: # %cif_mask_mixed
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB9_4
 ; CHECK-NEXT:  # %bb.3: # %cif_mixed_test_all
diff --git a/llvm/test/CodeGen/X86/avx512-i1test.ll b/llvm/test/CodeGen/X86/avx512-i1test.ll
index d43f05bbd5a1d..c5d4c87d66da2 100644
--- a/llvm/test/CodeGen/X86/avx512-i1test.ll
+++ b/llvm/test/CodeGen/X86/avx512-i1test.ll
@@ -14,13 +14,13 @@ define void @func() {
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %bb33
-; CHECK-NEXT:   # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    testb   %al, %al
-; CHECK-NEXT:    jne     .LBB0_1
-; CHECK-NEXT:  # %bb.2:                                # %bb35
-; CHECK-NEXT:  #   in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    testb   %al, %al
-; CHECK-NEXT:    jmp     .LBB0_1
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %bb35
+; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jmp .LBB0_1
 bb1:
   br i1 poison, label %L_10, label %L_10
 
diff --git a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
index 5a28e094f8a3c..4653c496ca051 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
@@ -531,21 +531,19 @@ bb1:
 define i32 @fcmp_ogt1(float %x) {
 ; SDAG-X64-LABEL: fcmp_ogt1:
 ; SDAG-X64:       ## %bb.0:
-; SDAG-X64-NEXT:    xorl    %eax, %eax
-; SDAG-X64-NEXT:    testb   %al, %al
-; SDAG-X64-NEXT:    je      LBB16_1
-; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
-; SDAG-X64-NEXT:    xorl    %eax, %eax
-; SDAG-X64-NEXT:    retq
-; SDAG-X64-NEXT:  LBB16_1: ## %bb2
-; SDAG-X64-NEXT:    movl    $1, %eax
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    testb %al, %al
+; SDAG-X64-NEXT:    jne LBB16_2
+; SDAG-X64-NEXT:  ## %bb.1: ## %bb2
+; SDAG-X64-NEXT:    movl $1, %eax
+; SDAG-X64-NEXT:  LBB16_2: ## %bb1
 ; SDAG-X64-NEXT:    retq
-
+;
 ; FASTISEL-X64-LABEL: fcmp_ogt1:
 ; FASTISEL-X64:       ## %bb.0:
-; FASTISEL-X64:         movl    $1, %eax
-; FASTISEL-X64:         retq
-
+; FASTISEL-X64-NEXT:    movl $1, %eax
+; FASTISEL-X64-NEXT:    retq
+;
 ; GISEL-X64-LABEL: fcmp_ogt1:
 ; GISEL-X64:       ## %bb.0:
 ; GISEL-X64-NEXT:    ucomiss %xmm0, %xmm0
@@ -558,6 +556,8 @@ define i32 @fcmp_ogt1(float %x) {
 ; GISEL-X64-NEXT:  LBB16_1: ## %bb2
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
+
+
   %1 = fcmp ogt float %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -667,6 +667,21 @@ bb1:
 }
 
 define i32 @fcmp_olt1(float %x) {
+; SDAG-X64-LABEL: fcmp_olt1:
+; SDAG-X64:       ## %bb.0:
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    testb %al, %al
+; SDAG-X64-NEXT:    jne LBB20_2
+; SDAG-X64-NEXT:  ## %bb.1: ## %bb2
+; SDAG-X64-NEXT:    movl $1, %eax
+; SDAG-X64-NEXT:  LBB20_2: ## %bb1
+; SDAG-X64-NEXT:    retq
+;
+; FASTISEL-X64-LABEL: fcmp_olt1:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    movl $1, %eax
+; FASTISEL-X64-NEXT:    retq
+;
 ; GISEL-X64-LABEL: fcmp_olt1:
 ; GISEL-X64:       ## %bb.0:
 ; GISEL-X64-NEXT:    ucomiss %xmm0, %xmm0
@@ -788,6 +803,22 @@ bb1:
 }
 
 define i32 @fcmp_one1(float %x) {
+; SDAG-X64-LABEL: fcmp_one1:
+; SDAG-X64:       ## %bb.0:
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    testb %al, %al
+; SDAG-X64-NEXT:    je LBB24_1
+; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
+; SDAG-X64-NEXT:    retq
+; SDAG-X64-NEXT:  LBB24_1: ## %bb2
+; SDAG-X64-NEXT:    movl $1, %eax
+; SDAG-X64-NEXT:    retq
+;
+; FASTISEL-X64-LABEL: fcmp_one1:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    movl $1, %eax
+; FASTISEL-X64-NEXT:    retq
+;
 ; GISEL-X64-LABEL: fcmp_one1:
 ; GISEL-X64:       ## %bb.0:
 ; GISEL-X64-NEXT:    ucomiss %xmm0, %xmm0
@@ -973,6 +1004,23 @@ bb1:
 }
 
 define i32 @fcmp_ueq1(float %x) {
+; SDAG-X64-LABEL: fcmp_ueq1:
+; SDAG-X64:       ## %bb.0:
+; SDAG-X64-NEXT:    movb $1, %al
+; SDAG-X64-NEXT:    testb %al, %al
+; SDAG-X64-NEXT:    jne LBB30_2
+; SDAG-X64-NEXT:  ## %bb.1: ## %bb2
+; SDAG-X64-NEXT:    movl $1, %eax
+; SDAG-X64-NEXT:    retq
+; SDAG-X64-NEXT:  LBB30_2: ## %bb1
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    retq
+;
+; FASTISEL-X64-LABEL: fcmp_ueq1:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    xorl %eax, %eax
+; FASTISEL-X64-NEXT:    retq
+;
 ; GISEL-X64-LABEL: fcmp_ueq1:
 ; GISEL-X64:       ## %bb.0:
 ; GISEL-X64-NEXT:    ucomiss %xmm0, %xmm0
@@ -1094,6 +1142,23 @@ bb1:
 }
 
 define i32 @fcmp_uge1(float %x) {
+; SDAG-X64-LABEL: fcmp_uge1:
+; SDAG-X64:       ## %bb.0:
+; SDAG-X64-NEXT:    movb $1, %al
+; SDAG-X64-NEXT:    testb %al, %al
+; SDAG-X64-NEXT:    je LBB34_1
+; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    retq
+; SDAG-X64-NEXT:  LBB34_1: ## %bb2
+; SDAG-X64-NEXT:    movl $1, %eax
+; SDAG-X64-NEXT:    retq
+;
+; FASTISEL-X64-LABEL: fcmp_uge1:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    xorl %eax, %eax
+; FASTISEL-X64-NEXT:    retq
+;
 ; GISEL-X64-LABEL: fcmp_uge1:
 ; GISEL-X64:       ## %bb.0:
 ; GISEL-X64-NEXT:    ucomiss %xmm0, %xmm0
@@ -1215,6 +1280,23 @@ bb1:
 }
 
 define i32 @fcmp_ule1(float %x) {
+; SDAG-X64-LABEL: fcmp_ule1:
+; SDAG-X64:       ## %bb.0:
+; SDAG-X64-NEXT:    movb $1, %al
+; SDAG-X64-NEXT:    testb %al, %al
+; SDAG-X64-NEXT:    je LBB38_1
+; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    retq
+; SDAG-X64-NEXT:  LBB38_1: ## %bb2
+; SDAG-X64-NEXT:    movl $1, %eax
+; SDAG-X64-NEXT:    retq
+;
+; FASTISEL-X64-LABEL: fcmp_ule1:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    xorl %eax, %eax
+; FASTISEL-X64-NEXT:    retq
+;
 ; GISEL-X64-LABEL: fcmp_ule1:
 ; GISEL-X64:       ## %bb.0:
 ; GISEL-X64-NEXT:    ucomiss %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
index 59a45d9d72f5b..869cae6ac8f65 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
@@ -31,18 +31,6 @@ define i32 @icmp_eq_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_eq_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jne LBB0_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB0_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_eq_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -89,18 +77,6 @@ define i32 @icmp_ne_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_ne_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    je LBB1_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB1_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_ne_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -147,18 +123,6 @@ define i32 @icmp_ugt_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_ugt_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jbe LBB2_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB2_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_ugt_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -205,18 +169,6 @@ define i32 @icmp_uge_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_uge_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jb LBB3_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB3_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_uge_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -263,18 +215,6 @@ define i32 @icmp_ult_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_ult_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jae LBB4_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB4_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_ult_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -321,18 +261,6 @@ define i32 @icmp_ule_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_ule_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    ja LBB5_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB5_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_ule_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -379,18 +307,6 @@ define i32 @icmp_sgt_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_sgt_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jle LBB6_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB6_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_sgt_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -437,18 +353,6 @@ define i32 @icmp_sge_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_sge_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jl LBB7_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB7_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_sge_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -495,18 +399,6 @@ define i32 @icmp_slt_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_slt_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jge LBB8_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB8_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_slt_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -553,18 +445,6 @@ define i32 @icmp_sle_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-; X86-LABEL: icmp_sle_2:
-; X86:       ## %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT:    jg LBB9_1
-; X86-NEXT:  ## %bb.2: ## %bb1
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    retl
-; X86-NEXT:  LBB9_1: ## %bb2
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    retl
-;
 ; GISEL-X86-LABEL: icmp_sle_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -594,15 +474,15 @@ define i32 @icmp_eq(i32 %x) {
 ; SDAG-NEXT:    je LBB10_1
 ; SDAG-NEXT:  ## %bb.2: ## %bb1
 ; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ; SDAG-NEXT:  LBB10_1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_eq:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    xorl %eax, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_eq:
 ; GISEL-X64:       ## %bb.0:
@@ -643,18 +523,16 @@ define i32 @icmp_ne(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-; SDAG-NEXT:    je LBB11_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
-; SDAG-NEXT:  LBB11_1: ## %bb2
+; SDAG-NEXT:    jne LBB11_2
+; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:  LBB11_2: ## %bb1
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_ne:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    movl $1, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_ne:
 ; GISEL-X64:       ## %bb.0:
@@ -695,18 +573,16 @@ define i32 @icmp_ugt(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-; SDAG-NEXT:    je LBB12_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
-; SDAG-NEXT:  LBB12_1: ## %bb2
+; SDAG-NEXT:    jne LBB12_2
+; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:  LBB12_2: ## %bb1
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_ugt:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    movl $1, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_ugt:
 ; GISEL-X64:       ## %bb.0:
@@ -750,15 +626,15 @@ define i32 @icmp_uge(i32 %x) {
 ; SDAG-NEXT:    je LBB13_1
 ; SDAG-NEXT:  ## %bb.2: ## %bb1
 ; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ; SDAG-NEXT:  LBB13_1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
-; FASTISEL-X64-LABEL: icmp_uge:
-; FASTISEL-X64:       ## %bb.0:
-; FASTISEL-X64-NEXT:    xorl %eax, %eax
-; FASTISEL-X64-NEXT:    retq
+; FASTISEL-LABEL: icmp_uge:
+; FASTISEL:       ## %bb.0:
+; FASTISEL-NEXT:    xorl %eax, %eax
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_uge:
 ; GISEL-X64:       ## %bb.0:
@@ -786,6 +662,10 @@ define i32 @icmp_uge(i32 %x) {
 ; GISEL-X86-NEXT:  LBB13_1: ## %bb2
 ; GISEL-X86-NEXT:    movl $1, %eax
 ; GISEL-X86-NEXT:    retl
+; FASTISEL-X64-LABEL: icmp_uge:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    xorl %eax, %eax
+; FASTISEL-X64-NEXT:    retq
   %1 = icmp uge i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -799,18 +679,16 @@ define i32 @icmp_ult(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-; SDAG-NEXT:    je LBB14_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
-; SDAG-NEXT:  LBB14_1: ## %bb2
+; SDAG-NEXT:    jne LBB14_2
+; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:  LBB14_2: ## %bb1
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
-; FASTISEL-X64-LABEL: icmp_ult:
-; FASTISEL-X64:       ## %bb.0:
-; FASTISEL-X64-NEXT:    movl $1, %eax
-; FASTISEL-X64-NEXT:    ret{{q|l}}
+; FASTISEL-LABEL: icmp_ult:
+; FASTISEL:       ## %bb.0:
+; FASTISEL-NEXT:    movl $1, %eax
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_ult:
 ; GISEL-X64:       ## %bb.0:
@@ -838,6 +716,10 @@ define i32 @icmp_ult(i32 %x) {
 ; GISEL-X86-NEXT:  LBB14_1: ## %bb2
 ; GISEL-X86-NEXT:    movl $1, %eax
 ; GISEL-X86-NEXT:    retl
+; FASTISEL-X64-LABEL: icmp_ult:
+; FASTISEL-X64:       ## %bb.0:
+; FASTISEL-X64-NEXT:    movl $1, %eax
+; FASTISEL-X64-NEXT:    ret{{q|l}}
   %1 = icmp ult i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -854,15 +736,15 @@ define i32 @icmp_ule(i32 %x) {
 ; SDAG-NEXT:    je LBB15_1
 ; SDAG-NEXT:  ## %bb.2: ## %bb1
 ; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ; SDAG-NEXT:  LBB15_1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_ule:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    xorl %eax, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_ule:
 ; GISEL-X64:       ## %bb.0:
@@ -903,18 +785,16 @@ define i32 @icmp_sgt(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-; SDAG-NEXT:    je LBB16_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
-; SDAG-NEXT:  LBB16_1: ## %bb2
+; SDAG-NEXT:    jne LBB16_2
+; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:  LBB16_2: ## %bb1
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_sgt:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    movl $1, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_sgt:
 ; GISEL-X64:       ## %bb.0:
@@ -958,15 +838,15 @@ define i32 @icmp_sge(i32 %x) {
 ; SDAG-NEXT:    je LBB17_1
 ; SDAG-NEXT:  ## %bb.2: ## %bb1
 ; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ; SDAG-NEXT:  LBB17_1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_sge:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    xorl %eax, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_sge:
 ; GISEL-X64:       ## %bb.0:
@@ -1007,18 +887,16 @@ define i32 @icmp_slt(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-; SDAG-NEXT:    je LBB18_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
-; SDAG-NEXT:  LBB18_1: ## %bb2
+; SDAG-NEXT:    jne LBB18_2
+; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:  LBB18_2: ## %bb1
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_slt:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    movl $1, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_slt:
 ; GISEL-X64:       ## %bb.0:
@@ -1062,15 +940,15 @@ define i32 @icmp_sle(i32 %x) {
 ; SDAG-NEXT:    je LBB19_1
 ; SDAG-NEXT:  ## %bb.2: ## %bb1
 ; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ; SDAG-NEXT:  LBB19_1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
-; SDAG-NEXT:    ret{{q|l}}
+; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_sle:
 ; FASTISEL:       ## %bb.0:
 ; FASTISEL-NEXT:    xorl %eax, %eax
-; FASTISEL-NEXT:    ret{{q|l}}
+; FASTISEL-NEXT:    ret{{[l|q]}}
 ;
 ; GISEL-X64-LABEL: icmp_sle:
 ; GISEL-X64:       ## %bb.0:
@@ -1105,3 +983,5 @@ bb2:
 bb1:
   ret i32 0
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X86: {{.*}}
diff --git a/llvm/test/CodeGen/X86/pr36602.ll b/llvm/test/CodeGen/X86/pr36602.ll
index fa2e05e863336..fe9490efb838c 100644
--- a/llvm/test/CodeGen/X86/pr36602.ll
+++ b/llvm/test/CodeGen/X86/pr36602.ll
@@ -9,7 +9,6 @@ define i32 @fn2() {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB0_2: # %bb2
 ; CHECK-NEXT:    movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index c3c96e8228797..c7b464cbe8a92 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -128,7 +128,6 @@ define dso_local void @fn() {
 ; CHECK-NEXT:    jne .LBB0_15
 ; CHECK-NEXT:  # %bb.14: # %if.then31
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    xorl %ecx, %ecx
 ; CHECK-NEXT:    xorl %ebp, %ebp
 ; CHECK-NEXT:    jmp .LBB0_15
 ; CHECK-NEXT:    .p2align 4
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c24823538aa14..2f605de07040f 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -288,7 +288,6 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_54
 ; CHECK-NEXT:  ## %bb.50: ## %for.body1664.lr.ph
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 426587a84ce17..cfe1e099e0e0c 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -754,7 +754,6 @@ define i256 @PR25498(i256 %a) nounwind {
 ; ILP-NEXT:    testq %r11, %r11
 ; ILP-NEXT:    cmovneq %rdx, %rcx
 ; ILP-NEXT:    orq $128, %rcx
-; ILP-NEXT:    xorl %edi, %edi
 ; ILP-NEXT:    orq %r10, %r9
 ; ILP-NEXT:    cmovneq %rsi, %rcx
 ; ILP-NEXT:    jmp .LBB4_3
@@ -803,7 +802,6 @@ define i256 @PR25498(i256 %a) nounwind {
 ; HYBRID-NEXT:    orq $128, %rcx
 ; HYBRID-NEXT:    orq %r10, %r9
 ; HYBRID-NEXT:    cmovneq %rdx, %rcx
-; HYBRID-NEXT:    xorl %edi, %edi
 ; HYBRID-NEXT:    jmp .LBB4_3
 ; HYBRID-NEXT:  .LBB4_1:
 ; HYBRID-NEXT:    movl $256, %ecx # imm = 0x100
@@ -850,7 +848,6 @@ define i256 @PR25498(i256 %a) nounwind {
 ; BURR-NEXT:    orq $128, %rcx
 ; BURR-NEXT:    orq %r10, %r9
 ; BURR-NEXT:    cmovneq %rdx, %rcx
-; BURR-NEXT:    xorl %edi, %edi
 ; BURR-NEXT:    jmp .LBB4_3
 ; BURR-NEXT:  .LBB4_1:
 ; BURR-NEXT:    movl $256, %ecx # imm = 0x100
@@ -897,7 +894,6 @@ define i256 @PR25498(i256 %a) nounwind {
 ; SRC-NEXT:    orq $128, %rcx
 ; SRC-NEXT:    orq %r10, %r9
 ; SRC-NEXT:    cmovneq %rdx, %rcx
-; SRC-NEXT:    xorl %edi, %edi
 ; SRC-NEXT:    jmp .LBB4_3
 ; SRC-NEXT:  .LBB4_1:
 ; SRC-NEXT:    movl $256, %ecx # imm = 0x100
@@ -944,7 +940,6 @@ define i256 @PR25498(i256 %a) nounwind {
 ; LIN-NEXT:    cmoveq %rsi, %rcx
 ; LIN-NEXT:    orq %r10, %r9
 ; LIN-NEXT:    cmoveq %rdx, %rcx
-; LIN-NEXT:    xorl %edi, %edi
 ; LIN-NEXT:    jmp .LBB4_3
 ; LIN-NEXT:  .LBB4_1:
 ; LIN-NEXT:    movl $256, %ecx # imm = 0x100
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index d9ab2f7d1f5fb..b5e9e987188ea 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -245,11 +245,9 @@ define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind {
 ; CHECK-NEXT:    jne .LBB3_9
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movzbl 0, %ebx
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne .LBB3_8
 ; CHECK-NEXT:  # %bb.2: # %bb.i
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je .LBB3_8
 ; CHECK-NEXT:  # %bb.3: # %lvalue_p.exit
@@ -519,7 +517,6 @@ define dso_local void @two() nounwind optsize {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je .LBB7_1
 ; CHECK-NEXT:  # %bb.2: # %return
@@ -560,7 +557,6 @@ define dso_local void @two_pgso() nounwind !prof !14 {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je .LBB8_1
 ; CHECK-NEXT:  # %bb.2: # %return
@@ -603,7 +599,6 @@ define dso_local void @two_minsize() nounwind minsize {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je .LBB9_1
 ; CHECK-NEXT:  # %bb.2: # %return
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index ee59169498d27..d4df6028b160f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -270,11 +270,7 @@ define i64 @PR55050() {
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    testb %al, %al
-; X86-NEXT:    jne .LBB15_2
-; X86-NEXT:  # %bb.1: # %if
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:  .LBB15_2: # %exit
-; X86-NEXT:    movl    %eax, %edx
+; X86-NEXT:    movl %eax, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: PR55050:
diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index b02da217e76b2..ca3d54ac0899f 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -365,7 +365,6 @@ define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
 ; CHECK-NEXT:    jl .LBB3_3
 ; CHECK-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl $1, %edx
 ; CHECK-NEXT:  .LBB3_2: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -387,7 +386,6 @@ define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
 ; CHECK-FORCEALL-NEXT:    jl .LBB3_3
 ; CHECK-FORCEALL-NEXT:  # %bb.1: # %for.body.preheader
 ; CHECK-FORCEALL-NEXT:    movl %edi, %ecx
-; CHECK-FORCEALL-NEXT:    xorl %eax, %eax
 ; CHECK-FORCEALL-NEXT:    movl $1, %edx
 ; CHECK-FORCEALL-NEXT:  .LBB3_2: # %for.body
 ; CHECK-FORCEALL-NEXT:    # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/X86/zext-extract_subreg.ll b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
index 877f11632b768..f31e099cf37b7 100644
--- a/llvm/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
@@ -8,7 +8,6 @@ define void @t() nounwind ssp {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne LBB0_6
 ; CHECK-NEXT:  ## %bb.1: ## %if.end.i
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    je LBB0_2
 ; CHECK-NEXT:  LBB0_6: ## %return
@@ -20,11 +19,9 @@ define void @t() nounwind ssp {
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne LBB0_5
 ; CHECK-NEXT:  ## %bb.3: ## %cond.true190
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:    jne LBB0_5
 ; CHECK-NEXT:  ## %bb.4: ## %cond.true225
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    testb %al, %al
 ; CHECK-NEXT:  LBB0_5: ## %cond.false205
 ; CHECK-NEXT:    ud2
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index 429bee4195fa9..495f3c50b589f 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -96,7 +96,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_4
 ; CHECK-NEXT:  ; %bb.3:
-; CHECK-NEXT:    v_mov_b32_e32 v0, 1
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index 842fd8836da7e..0cb936cbf77d3 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -37,7 +37,6 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_4
 ; CHECK-NEXT:  ; %bb.3:
-; CHECK-NEXT:    v_mov_b32_e32 v0, 1
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected
index e74cc7c452034..e77de3d7bec7a 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected
@@ -89,7 +89,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    callq OUTLINED_FUNCTION_0
 ; CHECK-NEXT:  .LBB0_6:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    addq $20, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected
index 96f3ac99e21bb..bcd78971b065f 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected
@@ -30,7 +30,6 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    callq OUTLINED_FUNCTION_0
 ; CHECK-NEXT:  .LBB0_6:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    addq $20, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8

>From 9a1de88e67ba34c8f6d1cbcece3ed3173a6ce9b2 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 13 May 2025 19:16:48 +0530
Subject: [PATCH 2/7] Remove the isPseudo check

---
 llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c3b6115338f05..43945bfb9e532 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -191,7 +191,7 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
           DefedReg = MO.getReg();
         else
           return false;
-      } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+      } else if (MI->isMoveImmediate()) {
         return DefedReg.isValid();
       } else if (MO.getReg() && MO.getReg() != FrameReg)
         return false;

>From 2ab66a2a6342e26267fd21bd64ef9a18797cbdad Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 13 May 2025 23:56:00 +0530
Subject: [PATCH 3/7] Fix the test cases

---
 llvm/test/CodeGen/X86/isel-brcond-fcmp.ll | 24 -------
 llvm/test/CodeGen/X86/isel-brcond-icmp.ll | 83 -----------------------
 2 files changed, 107 deletions(-)

diff --git a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
index 808e4949e7726..cfafe6500f7f4 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
@@ -533,19 +533,10 @@ define i32 @fcmp_ogt1(float %x) {
 ; SDAG-X64:       ## %bb.0:
 ; SDAG-X64-NEXT:    xorl %eax, %eax
 ; SDAG-X64-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-X64-NEXT:    jne LBB16_2
 ; SDAG-X64-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-X64-NEXT:    movl $1, %eax
 ; SDAG-X64-NEXT:  LBB16_2: ## %bb1
-=======
-; SDAG-X64-NEXT:    je LBB16_1
-; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
-; SDAG-X64-NEXT:    xorl %eax, %eax
-; SDAG-X64-NEXT:    retq
-; SDAG-X64-NEXT:  LBB16_1: ## %bb2
-; SDAG-X64-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-X64-NEXT:    retq
 ;
 ; FASTISEL-X64-LABEL: fcmp_ogt1:
@@ -565,8 +556,6 @@ define i32 @fcmp_ogt1(float %x) {
 ; GISEL-X64-NEXT:  LBB16_1: ## %bb2
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
-
-
   %1 = fcmp ogt float %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -680,19 +669,10 @@ define i32 @fcmp_olt1(float %x) {
 ; SDAG-X64:       ## %bb.0:
 ; SDAG-X64-NEXT:    xorl %eax, %eax
 ; SDAG-X64-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-X64-NEXT:    jne LBB20_2
 ; SDAG-X64-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-X64-NEXT:    movl $1, %eax
 ; SDAG-X64-NEXT:  LBB20_2: ## %bb1
-=======
-; SDAG-X64-NEXT:    je LBB20_1
-; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
-; SDAG-X64-NEXT:    xorl %eax, %eax
-; SDAG-X64-NEXT:    retq
-; SDAG-X64-NEXT:  LBB20_1: ## %bb2
-; SDAG-X64-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-X64-NEXT:    retq
 ;
 ; FASTISEL-X64-LABEL: fcmp_olt1:
@@ -827,10 +807,6 @@ define i32 @fcmp_one1(float %x) {
 ; SDAG-X64-NEXT:    testb %al, %al
 ; SDAG-X64-NEXT:    je LBB24_1
 ; SDAG-X64-NEXT:  ## %bb.2: ## %bb1
-<<<<<<< HEAD
-=======
-; SDAG-X64-NEXT:    xorl %eax, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-X64-NEXT:    retq
 ; SDAG-X64-NEXT:  LBB24_1: ## %bb2
 ; SDAG-X64-NEXT:    movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
index a9343909d43b5..80822f53ea702 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
@@ -31,8 +31,6 @@ define i32 @icmp_eq_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_eq_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -57,7 +55,6 @@ define i32 @icmp_eq_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_eq_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -104,8 +101,6 @@ define i32 @icmp_ne_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_ne_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -130,7 +125,6 @@ define i32 @icmp_ne_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_ne_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -177,8 +171,6 @@ define i32 @icmp_ugt_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_ugt_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -203,7 +195,6 @@ define i32 @icmp_ugt_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_ugt_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -250,8 +241,6 @@ define i32 @icmp_uge_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_uge_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -276,7 +265,6 @@ define i32 @icmp_uge_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_uge_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -323,8 +311,6 @@ define i32 @icmp_ult_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_ult_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -349,7 +335,6 @@ define i32 @icmp_ult_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_ult_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -396,8 +381,6 @@ define i32 @icmp_ule_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_ule_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -422,7 +405,6 @@ define i32 @icmp_ule_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_ule_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -469,8 +451,6 @@ define i32 @icmp_sgt_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_sgt_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -495,7 +475,6 @@ define i32 @icmp_sgt_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_sgt_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -542,8 +521,6 @@ define i32 @icmp_sge_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_sge_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -568,7 +545,6 @@ define i32 @icmp_sge_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_sge_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -615,8 +591,6 @@ define i32 @icmp_slt_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_slt_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -641,7 +615,6 @@ define i32 @icmp_slt_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_slt_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -688,8 +661,6 @@ define i32 @icmp_sle_2(i32 %x, i32 %y) {
 ; GISEL-X64-NEXT:    movl $1, %eax
 ; GISEL-X64-NEXT:    retq
 ;
-<<<<<<< HEAD
-=======
 ; SDAG-X86-LABEL: icmp_sle_2:
 ; SDAG-X86:       ## %bb.0:
 ; SDAG-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -714,7 +685,6 @@ define i32 @icmp_sle_2(i32 %x, i32 %y) {
 ; FASTISEL-X86-NEXT:    movl $1, %eax
 ; FASTISEL-X86-NEXT:    retl
 ;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; GISEL-X86-LABEL: icmp_sle_2:
 ; GISEL-X86:       ## %bb.0:
 ; GISEL-X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -793,19 +763,10 @@ define i32 @icmp_ne(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-NEXT:    jne LBB11_2
 ; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
 ; SDAG-NEXT:  LBB11_2: ## %bb1
-=======
-; SDAG-NEXT:    je LBB11_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{[l|q]}}
-; SDAG-NEXT:  LBB11_1: ## %bb2
-; SDAG-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_ne:
@@ -852,19 +813,10 @@ define i32 @icmp_ugt(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-NEXT:    jne LBB12_2
 ; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
 ; SDAG-NEXT:  LBB12_2: ## %bb1
-=======
-; SDAG-NEXT:    je LBB12_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{[l|q]}}
-; SDAG-NEXT:  LBB12_1: ## %bb2
-; SDAG-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_ugt:
@@ -950,10 +902,6 @@ define i32 @icmp_uge(i32 %x) {
 ; GISEL-X86-NEXT:  LBB13_1: ## %bb2
 ; GISEL-X86-NEXT:    movl $1, %eax
 ; GISEL-X86-NEXT:    retl
-; FASTISEL-X64-LABEL: icmp_uge:
-; FASTISEL-X64:       ## %bb.0:
-; FASTISEL-X64-NEXT:    xorl %eax, %eax
-; FASTISEL-X64-NEXT:    retq
   %1 = icmp uge i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -967,19 +915,10 @@ define i32 @icmp_ult(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-NEXT:    jne LBB14_2
 ; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
 ; SDAG-NEXT:  LBB14_2: ## %bb1
-=======
-; SDAG-NEXT:    je LBB14_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{[l|q]}}
-; SDAG-NEXT:  LBB14_1: ## %bb2
-; SDAG-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_ult:
@@ -1013,10 +952,6 @@ define i32 @icmp_ult(i32 %x) {
 ; GISEL-X86-NEXT:  LBB14_1: ## %bb2
 ; GISEL-X86-NEXT:    movl $1, %eax
 ; GISEL-X86-NEXT:    retl
-; FASTISEL-X64-LABEL: icmp_ult:
-; FASTISEL-X64:       ## %bb.0:
-; FASTISEL-X64-NEXT:    movl $1, %eax
-; FASTISEL-X64-NEXT:    ret{{q|l}}
   %1 = icmp ult i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -1082,19 +1017,10 @@ define i32 @icmp_sgt(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-NEXT:    jne LBB16_2
 ; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
 ; SDAG-NEXT:  LBB16_2: ## %bb1
-=======
-; SDAG-NEXT:    je LBB16_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{[l|q]}}
-; SDAG-NEXT:  LBB16_1: ## %bb2
-; SDAG-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_sgt:
@@ -1193,19 +1119,10 @@ define i32 @icmp_slt(i32 %x) {
 ; SDAG:       ## %bb.0:
 ; SDAG-NEXT:    xorl %eax, %eax
 ; SDAG-NEXT:    testb %al, %al
-<<<<<<< HEAD
 ; SDAG-NEXT:    jne LBB18_2
 ; SDAG-NEXT:  ## %bb.1: ## %bb2
 ; SDAG-NEXT:    movl $1, %eax
 ; SDAG-NEXT:  LBB18_2: ## %bb1
-=======
-; SDAG-NEXT:    je LBB18_1
-; SDAG-NEXT:  ## %bb.2: ## %bb1
-; SDAG-NEXT:    xorl %eax, %eax
-; SDAG-NEXT:    ret{{[l|q]}}
-; SDAG-NEXT:  LBB18_1: ## %bb2
-; SDAG-NEXT:    movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
 ; SDAG-NEXT:    ret{{[l|q]}}
 ;
 ; FASTISEL-LABEL: icmp_slt:

>From 9ca319001e470049be4b2f6cd136f68fe94b05c6 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 15 May 2025 17:03:49 +0530
Subject: [PATCH 4/7] Update the logic for defs

---
 llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 19 +++++--
 llvm/test/CodeGen/AArch64/vector-lrint.ll     |  2 +
 ...tor-flatscratchinit-undefined-behavior2.ll |  1 -
 llvm/test/CodeGen/AMDGPU/call-waitcnt.ll      |  1 +
 .../AMDGPU/cgp-addressing-modes-gfx1030.ll    |  1 +
 .../AMDGPU/cgp-addressing-modes-gfx908.ll     |  1 +
 llvm/test/CodeGen/AMDGPU/div_v2i128.ll        | 12 +++++
 ...frame-setup-without-sgpr-to-vgpr-spills.ll |  2 +
 .../AMDGPU/gfx-callable-return-types.ll       | 28 +++++-----
 .../local-stack-alloc-block-sp-reference.ll   |  2 +
 .../materialize-frame-index-sgpr.gfx10.ll     | 51 -------------------
 .../AMDGPU/preserve-wwm-copy-dst-reg.ll       |  2 +
 .../AMDGPU/required-export-priority.ll        |  1 +
 llvm/test/CodeGen/AMDGPU/sibling-call.ll      | 15 +++---
 .../AMDGPU/spill-offset-calculation.ll        |  5 --
 .../CodeGen/AMDGPU/spill-scavenge-offset.ll   |  8 +++
 .../CodeGen/X86/scheduler-backtracking.ll     |  1 +
 ...dgpu_generated_funcs.ll.generated.expected |  1 +
 ...pu_generated_funcs.ll.nogenerated.expected |  1 +
 19 files changed, 73 insertions(+), 81 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 43945bfb9e532..7f61aec2987b9 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,13 +186,24 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg()) {
-      if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
+      if (MO.isDef()) {
         if (i == 0 && !MO.isImplicit() && !MO.isDead())
           DefedReg = MO.getReg();
-        else
+        else if (i != 0 && DefedReg != MCRegister::NoRegister) {
+          if (MO.isDead() && MO.isImplicit()) {
+            continue;
+          } else if (MO.isImplicit() &&
+                     MI->getParent()
+                         ->getParent()
+                         ->getSubtarget()
+                         .getRegisterInfo()
+                         ->isSubRegister(MO.getReg(), DefedReg)) {
+            continue;
+          } else {
+            return false;
+          }
+        } else
           return false;
-      } else if (MI->isMoveImmediate()) {
-        return DefedReg.isValid();
       } else if (MO.getReg() && MO.getReg() != FrameReg)
         return false;
     } else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 602643264e7be..3092f72908200 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -1335,3 +1335,5 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
   ret <32 x iXLen> %a
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-i32-GI: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
index 51caa84450ff3..9bb74a9d80264 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
@@ -246,7 +246,6 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
 ; GFX8-ARCH-FLAT-NEXT:    v_readlane_b32 s30, v3, 0
 ; GFX8-ARCH-FLAT-NEXT:    s_mov_b32 s32, s33
 ; GFX8-ARCH-FLAT-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX8-ARCH-FLAT-NEXT:    s_add_i32 s3, s33, 8
 ; GFX8-ARCH-FLAT-NEXT:    scratch_load_dword v3, off, s3 ; 4-byte Folded Reload
 ; GFX8-ARCH-FLAT-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX8-ARCH-FLAT-NEXT:    s_mov_b32 s33, s2
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 6889110752ae0..9abb50651146a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,6 +41,7 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_store_dword v0, v0, s[6:7]
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[4:5]
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    s_mov_b32 s32, 0
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; GCN-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 98771dcb441cc..81f768f303ca1 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,6 +31,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
 ; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v1
 ; GCN-NEXT:    s_cbranch_execz .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %if
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 2
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index d212d7d52d841..49370e2fbf1b6 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,6 +33,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
 ; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GCN-NEXT:    s_cbranch_execz .LBB0_2
 ; GCN-NEXT:  ; %bb.1: ; %if
+; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    v_mov_b32_e32 v1, 2.0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index a4ae669617263..77b78f1f8a333 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,6 +323,8 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v36, vcc, 64, v30
 ; SDAG-NEXT:    v_lshr_b64 v[37:38], v[6:7], v30
 ; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v29
+; SDAG-NEXT:    v_mov_b32_e32 v12, 0
+; SDAG-NEXT:    v_mov_b32_e32 v13, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v14, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v15, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -1105,6 +1107,8 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v28, vcc, 64, v22
 ; SDAG-NEXT:    v_lshr_b64 v[29:30], v[6:7], v22
 ; SDAG-NEXT:    v_add_i32_e32 v26, vcc, -1, v12
+; SDAG-NEXT:    v_mov_b32_e32 v20, 0
+; SDAG-NEXT:    v_mov_b32_e32 v21, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v10, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v11, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -1675,6 +1679,8 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v37, vcc, 64, v32
 ; SDAG-NEXT:    v_lshr_b64 v[24:25], v[0:1], v32
 ; SDAG-NEXT:    v_add_i32_e32 v36, vcc, -1, v31
+; SDAG-NEXT:    v_mov_b32_e32 v18, 0
+; SDAG-NEXT:    v_mov_b32_e32 v19, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v22, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v23, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -1868,6 +1874,8 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v51, vcc, 64, v38
 ; SDAG-NEXT:    v_lshr_b64 v[22:23], v[4:5], v38
 ; SDAG-NEXT:    v_add_i32_e32 v50, vcc, -1, v37
+; SDAG-NEXT:    v_mov_b32_e32 v18, 0
+; SDAG-NEXT:    v_mov_b32_e32 v19, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v20, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v21, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -2554,6 +2562,8 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v35, vcc, 64, v30
 ; SDAG-NEXT:    v_lshr_b64 v[26:27], v[2:3], v30
 ; SDAG-NEXT:    v_add_i32_e32 v34, vcc, -1, v8
+; SDAG-NEXT:    v_mov_b32_e32 v20, 0
+; SDAG-NEXT:    v_mov_b32_e32 v21, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v24, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v25, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
@@ -2727,6 +2737,8 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; SDAG-NEXT:    v_subrev_i32_e32 v39, vcc, 64, v34
 ; SDAG-NEXT:    v_lshr_b64 v[26:27], v[6:7], v34
 ; SDAG-NEXT:    v_add_i32_e32 v38, vcc, -1, v12
+; SDAG-NEXT:    v_mov_b32_e32 v22, 0
+; SDAG-NEXT:    v_mov_b32_e32 v23, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v24, 0
 ; SDAG-NEXT:    v_mov_b32_e32 v25, 0
 ; SDAG-NEXT:    s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index a3c7eb8c56fb0..13884eb788d8a 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,6 +53,7 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    v_writelane_b32 v0, s31, 0
@@ -76,6 +77,7 @@ define void @callee_with_stack_and_call() #0 {
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    s_waitcnt vmcnt(0)
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 s[4:5], exec
 ; NO-SPILL-TO-VGPR-NEXT:    s_mov_b64 exec, 1
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:16
 ; NO-SPILL-TO-VGPR-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index e8aa64f0b8f15..6384fdba7a45a 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
 ; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s1
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT:    v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT:    v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT:    v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT:    v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT:    v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT:    v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
-; GFX11-NEXT:    v_mov_b32_e32 v31, 0
+; GFX11-NEXT:    v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
+; GFX11-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT:    v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT:    v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT:    v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT:    v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT:    v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT:    v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT:    v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT:    v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT:    v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT:    v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT:    v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
 ; GFX11-NEXT:    s_mov_b32 s1, return_72xi32 at abs32@hi
 ; GFX11-NEXT:    s_mov_b32 s0, return_72xi32 at abs32@lo
 ; GFX11-NEXT:    v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 97b7e26d1230b..a3ebaec4811a9 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,10 +225,12 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
 ; MUBUF-NEXT:  ; %bb.2: ; %split
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x4000
 ; MUBUF-NEXT:    v_or_b32_e32 v0, 0x12d4, v1
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x4000
 ; MUBUF-NEXT:    s_movk_i32 s4, 0x4000
 ; MUBUF-NEXT:    buffer_load_dword v5, v0, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_or_b32_e32 v0, 0x12d0, v1
+; MUBUF-NEXT:    v_mov_b32_e32 v1, 0x4000
 ; MUBUF-NEXT:    s_or_b32 s4, s4, 0x12c0
 ; MUBUF-NEXT:    buffer_load_dword v4, v0, s[0:3], 0 offen glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
index 4b5a7c207055a..198e98d30c8c9 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
@@ -32,7 +32,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80880
 ; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -61,7 +60,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80880
 ; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
@@ -91,7 +89,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s1, s32, 0x4044
 ; GFX11-NEXT:    scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -155,7 +152,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s6, s32, 0x101100
 ; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -183,7 +179,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s6, s32, 0x101100
 ; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
@@ -212,7 +207,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
 ; GFX942-NEXT:    ;;#ASMEND
 ; GFX942-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s2, s32, 0x4044
 ; GFX942-NEXT:    scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
@@ -248,7 +242,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80880
 ; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -275,7 +268,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80880
 ; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
@@ -302,7 +294,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s1, s32, 0x4044
 ; GFX11-NEXT:    scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -358,7 +349,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s6, s32, 0x101100
 ; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -384,7 +374,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s6, s32, 0x101100
 ; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
@@ -410,7 +399,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
 ; GFX942-NEXT:    ;;#ASMEND
 ; GFX942-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s2, s32, 0x4044
 ; GFX942-NEXT:    scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
@@ -450,7 +438,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s6, s33, 0x80880
 ; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -484,7 +471,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s6, s33, 0x80880
 ; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_mov_b32 s33, s5
@@ -518,7 +504,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s2, s33, 0x4044
 ; GFX11-NEXT:    scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_mov_b32 s33, s1
@@ -592,7 +577,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX8-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX8-NEXT:    s_mov_b32 s32, s33
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s7, s33, 0x101100
 ; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_mov_b32 s33, s6
@@ -625,7 +609,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX900-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX900-NEXT:    s_mov_b32 s32, s33
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s7, s33, 0x101100
 ; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_mov_b32 s33, s6
@@ -659,7 +642,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
 ; GFX942-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX942-NEXT:    s_mov_b32 s32, s33
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s3, s33, 0x4044
 ; GFX942-NEXT:    scratch_load_dword v1, off, s3 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_mov_b32 s33, s2
@@ -691,7 +673,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80800
 ; GFX10_1-NEXT:    buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -715,7 +696,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80800
 ; GFX10_3-NEXT:    buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
@@ -741,7 +721,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s1, s32, 0x4040
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -791,7 +770,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s6, s32, 0x101000
 ; GFX8-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -814,7 +792,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s6, s32, 0x101000
 ; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
@@ -838,7 +815,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
 ; GFX942-NEXT:    ;;#ASMEND
 ; GFX942-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s2, s32, 0x4040
 ; GFX942-NEXT:    scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
@@ -865,7 +841,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x80800
 ; GFX10_1-NEXT:    buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -887,7 +862,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x80800
 ; GFX10_3-NEXT:    buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
@@ -909,7 +883,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s1, s32, 0x4040
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -955,7 +928,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s6, s32, 0x101000
 ; GFX8-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -976,7 +948,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s6, s32, 0x101000
 ; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
@@ -997,7 +968,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
 ; GFX942-NEXT:    ;;#ASMEND
 ; GFX942-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s2, s32, 0x4040
 ; GFX942-NEXT:    scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
@@ -1030,7 +1000,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s6, s33, 0x80800
 ; GFX10_1-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -1059,7 +1028,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s6, s33, 0x80800
 ; GFX10_3-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_mov_b32 s33, s5
@@ -1089,7 +1057,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s2, s33, 0x4040
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_mov_b32 s33, s1
@@ -1149,7 +1116,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
 ; GFX8-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, s33
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s7, s33, 0x101000
 ; GFX8-NEXT:    buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_mov_b32 s33, s6
@@ -1177,7 +1143,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
 ; GFX900-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX900-NEXT:    s_mov_b32 s32, s33
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s7, s33, 0x101000
 ; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_mov_b32 s33, s6
@@ -1206,7 +1171,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
 ; GFX942-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX942-NEXT:    s_mov_b32 s32, s33
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s3, s33, 0x4040
 ; GFX942-NEXT:    scratch_load_dword v0, off, s3 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_mov_b32 s33, s2
@@ -1238,7 +1202,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s5, -1
-; GFX10_1-NEXT:    s_add_i32 s6, s33, 0x80800
 ; GFX10_1-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s5
@@ -1265,7 +1228,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s5, -1
-; GFX10_3-NEXT:    s_add_i32 s6, s33, 0x80800
 ; GFX10_3-NEXT:    buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s5
 ; GFX10_3-NEXT:    s_mov_b32 s33, s4
@@ -1291,7 +1253,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s1, -1
-; GFX11-NEXT:    s_add_i32 s2, s33, 0x4040
 ; GFX11-NEXT:    scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s1
 ; GFX11-NEXT:    s_mov_b32 s33, s0
@@ -1347,7 +1308,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
 ; GFX8-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX8-NEXT:    s_mov_b32 s32, s33
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GFX8-NEXT:    s_add_i32 s5, s33, 0x101000
 ; GFX8-NEXT:    buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX8-NEXT:    s_mov_b32 s33, s4
@@ -1373,7 +1333,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
 ; GFX900-NEXT:    v_readlane_b32 s55, v0, 0
 ; GFX900-NEXT:    s_mov_b32 s32, s33
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[6:7], -1
-; GFX900-NEXT:    s_add_i32 s5, s33, 0x101000
 ; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX900-NEXT:    s_mov_b32 s33, s4
@@ -1433,7 +1392,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x100800
 ; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -1461,7 +1419,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x100800
 ; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
@@ -1487,7 +1444,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX11-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT:    s_add_i32 s1, s32, 0x8040
 ; GFX11-NEXT:    scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
 ; GFX11-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
@@ -1543,7 +1499,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s6, s32, 0x201000
 ; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -1570,7 +1525,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s6, s32, 0x201000
 ; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
@@ -1596,7 +1550,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
 ; GFX942-NEXT:    ;;#ASMEND
 ; GFX942-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX942-NEXT:    s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT:    s_add_i32 s2, s32, 0x8040
 ; GFX942-NEXT:    scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
 ; GFX942-NEXT:    s_mov_b64 exec, s[0:1]
 ; GFX942-NEXT:    s_waitcnt vmcnt(0)
@@ -1634,7 +1587,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
 ; GFX10_1-NEXT:    ;;#ASMEND
 ; GFX10_1-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_1-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT:    s_add_i32 s5, s32, 0x100800
 ; GFX10_1-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_1-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_1-NEXT:    s_mov_b32 exec_lo, s4
@@ -1664,7 +1616,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
 ; GFX10_3-NEXT:    ;;#ASMEND
 ; GFX10_3-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX10_3-NEXT:    s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT:    s_add_i32 s5, s32, 0x100800
 ; GFX10_3-NEXT:    buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
 ; GFX10_3-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX10_3-NEXT:    s_waitcnt vmcnt(0)
@@ -1754,7 +1705,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX8-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT:    s_add_i32 s6, s32, 0x201000
 ; GFX8-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX8-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
@@ -1783,7 +1733,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
 ; GFX900-NEXT:    ;;#ASMEND
 ; GFX900-NEXT:    v_readlane_b32 s55, v1, 0
 ; GFX900-NEXT:    s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT:    s_add_i32 s6, s32, 0x201000
 ; GFX900-NEXT:    buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
 ; GFX900-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX900-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 3d6e7c532348f..2d95ec6f699dc 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,6 +395,7 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX908-NEXT:    buffer_load_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[16:17]
+; GFX908-NEXT:    s_mov_b64 s[16:17], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
 ; GFX908-NEXT:    buffer_store_dword v2, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    v_writelane_b32 v2, s31, 0
@@ -742,6 +743,7 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
 ; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    s_waitcnt vmcnt(0)
 ; GFX908-NEXT:    s_mov_b64 exec, s[4:5]
+; GFX908-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX908-NEXT:    s_mov_b64 exec, 1
 ; GFX908-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:168
 ; GFX908-NEXT:    buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index e145380dca59e..4aa1ddee2efe3 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,6 +267,7 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
 ; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 ; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 16, v2, vcc_lo
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    scratch_store_b32 v0, v1, off
 ; GCN-NEXT:    scratch_load_b32 v0, off, off
 ; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index ddb7d6b9c3936..35234236b848f 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,7 +388,11 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
 ; GCN-NEXT:    s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32 at gotpcrel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32 at gotpcrel32@hi+12
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_writelane_b32 v40, s30, 0
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
@@ -419,9 +423,6 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
 ; GCN-NEXT:    v_mov_b32_e32 v29, 0
 ; GCN-NEXT:    v_mov_b32_e32 v30, 0
 ; GCN-NEXT:    v_writelane_b32 v40, s31, 1
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; GCN-NEXT:    v_readlane_b32 s31, v40, 1
@@ -527,6 +528,10 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
 ; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:48
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
+; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0
@@ -555,9 +560,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
 ; GCN-NEXT:    v_mov_b32_e32 v28, 0
 ; GCN-NEXT:    v_mov_b32_e32 v29, 0
 ; GCN-NEXT:    v_mov_b32_e32 v30, 0
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[4:5]
 entry:
@@ -926,6 +928,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
 ; GCN-NEXT:    s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed at rel32@lo+4
 ; GCN-NEXT:    s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed at rel32@hi+12
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    v_mov_b32_e32 v2, 0
 ; GCN-NEXT:    v_mov_b32_e32 v3, 0
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index 0452c3b89e9a9..66e2e3d41d95a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -125,7 +125,6 @@ define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
 ; MUBUF-NEXT:    ;;#ASMEND
 ; MUBUF-NEXT:    ;;#ASMSTART
 ; MUBUF-NEXT:    ;;#ASMEND
-; MUBUF-NEXT:    s_add_i32 s10, s32, 0x40100
 ; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s10 ; 4-byte Folded Reload
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    ;;#ASMSTART
@@ -147,7 +146,6 @@ define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
 ; FLATSCR-NEXT:    ;;#ASMEND
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    s_add_i32 s8, s32, 0x1004
 ; FLATSCR-NEXT:    scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    ;;#ASMSTART
@@ -466,7 +464,6 @@ define void @test_sgpr_offset_function() {
 ; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
 ; MUBUF-NEXT:    ;;#ASMSTART
 ; MUBUF-NEXT:    ;;#ASMEND
-; MUBUF-NEXT:    s_add_i32 s4, s32, 0x40100
 ; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
@@ -482,7 +479,6 @@ define void @test_sgpr_offset_function() {
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
 ; FLATSCR-NEXT:    ;;#ASMSTART
 ; FLATSCR-NEXT:    ;;#ASMEND
-; FLATSCR-NEXT:    s_add_i32 s0, s32, 0x1004
 ; FLATSCR-NEXT:    scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:8
@@ -586,7 +582,6 @@ define void @test_inst_offset_subregs_function() {
 ; MUBUF-NEXT:    ;;#ASMEND
 ; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
-; MUBUF-NEXT:    s_add_i32 s4, s32, 0x3ff00
 ; MUBUF-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
 ; MUBUF-NEXT:    buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index e9aebeef6ea6d..50056b62b3397 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,6 +9971,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s34, 0x80c00
@@ -9988,6 +9989,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10005,6 +10007,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s34, 0x81400
@@ -10022,6 +10025,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10039,6 +10043,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 0xff
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s34, 0x81c00
@@ -10056,6 +10061,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
+; GFX6-NEXT:    s_mov_b64 s[6:7], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt expcnt(0)
@@ -10099,6 +10105,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    buffer_load_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[34:35]
+; GFX6-NEXT:    s_mov_b64 s[34:35], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s44, 0x82c00
@@ -10158,6 +10165,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_mov_b64 exec, s[4:5]
 ; GFX6-NEXT:    s_mov_b64 s[36:37], s[0:1]
+; GFX6-NEXT:    s_mov_b64 s[4:5], exec
 ; GFX6-NEXT:    s_mov_b64 exec, 15
 ; GFX6-NEXT:    buffer_store_dword v4, off, s[40:43], 0
 ; GFX6-NEXT:    s_mov_b32 s6, 0x80800
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index cfe1e099e0e0c..3ecf4eabdf020 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -940,6 +940,7 @@ define i256 @PR25498(i256 %a) nounwind {
 ; LIN-NEXT:    cmoveq %rsi, %rcx
 ; LIN-NEXT:    orq %r10, %r9
 ; LIN-NEXT:    cmoveq %rdx, %rcx
+; LIN-NEXT:    xorl %edi, %edi
 ; LIN-NEXT:    jmp .LBB4_3
 ; LIN-NEXT:  .LBB4_1:
 ; LIN-NEXT:    movl $256, %ecx # imm = 0x100
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index 495f3c50b589f..429bee4195fa9 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -96,6 +96,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_4
 ; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_mov_b32_e32 v0, 1
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index 0cb936cbf77d3..842fd8836da7e 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -37,6 +37,7 @@ define dso_local i32 @check_boundaries() #0 {
 ; CHECK-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
 ; CHECK-NEXT:    s_cbranch_execz .LBB0_4
 ; CHECK-NEXT:  ; %bb.3:
+; CHECK-NEXT:    v_mov_b32_e32 v0, 1
 ; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:12
 ; CHECK-NEXT:  .LBB0_4:
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]

>From 33373fa7327c375e446e81268782f0e2377e8eab Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 15 May 2025 19:10:09 +0530
Subject: [PATCH 5/7] Restructure the if else ladder

---
 llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 7f61aec2987b9..0b9f984408169 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -190,18 +190,15 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
         if (i == 0 && !MO.isImplicit() && !MO.isDead())
           DefedReg = MO.getReg();
         else if (i != 0 && DefedReg != MCRegister::NoRegister) {
-          if (MO.isDead() && MO.isImplicit()) {
+          if (MO.isDead() && MO.isImplicit())
             continue;
-          } else if (MO.isImplicit() &&
-                     MI->getParent()
-                         ->getParent()
-                         ->getSubtarget()
-                         .getRegisterInfo()
-                         ->isSubRegister(MO.getReg(), DefedReg)) {
+          if (MO.isImplicit() && MI->getParent()
+                                     ->getParent()
+                                     ->getSubtarget()
+                                     .getRegisterInfo()
+                                     ->isSubRegister(MO.getReg(), DefedReg))
             continue;
-          } else {
-            return false;
-          }
+          return false;
         } else
           return false;
       } else if (MO.getReg() && MO.getReg() != FrameReg)

>From a709a64c927352f08badc715d3743df5f49cffac Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Mon, 19 May 2025 12:04:20 +0530
Subject: [PATCH 6/7] Cleanup test case and pass TRI as argument in isCandidate
 function

---
 llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 10 +++-------
 llvm/test/CodeGen/AArch64/vector-lrint.ll     |  2 --
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 0b9f984408169..4bb7bdffad218 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -178,7 +178,7 @@ void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
 // and the only reg it may use is FrameReg. Typically this is an immediate
 // load or a load-address instruction.
 static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
-                        Register FrameReg) {
+                        Register FrameReg, const TargetRegisterInfo *TRI) {
   DefedReg = MCRegister::NoRegister;
   bool SawStore = true;
   if (!MI->isSafeToMove(SawStore) || MI->isImplicitDef() || MI->isInlineAsm())
@@ -192,11 +192,7 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
         else if (i != 0 && DefedReg != MCRegister::NoRegister) {
           if (MO.isDead() && MO.isImplicit())
             continue;
-          if (MO.isImplicit() && MI->getParent()
-                                     ->getParent()
-                                     ->getSubtarget()
-                                     .getRegisterInfo()
-                                     ->isSubRegister(MO.getReg(), DefedReg))
+          if (MO.isImplicit() && TRI->regsOverlap(MO.getReg(), DefedReg))
             continue;
           return false;
         } else
@@ -245,7 +241,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
     }
 
     Register DefedReg;
-    bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
+    bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg, TRI);
 
     // Check for an earlier identical and reusable instruction.
     if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 3092f72908200..602643264e7be 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -1335,5 +1335,3 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
   ret <32 x iXLen> %a
 }
 declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-i32-GI: {{.*}}

>From f7792d2d0cbea0475ff468e280cb832315c2c6bc Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Mon, 19 May 2025 17:40:21 +0530
Subject: [PATCH 7/7] Updating the comments and isSubRegister call

---
 llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 4bb7bdffad218..afa9d87507738 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -187,12 +187,23 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg()) {
       if (MO.isDef()) {
+        // To get the \DefedReg value, we need to check that 1st MachineOperand
+        // is not dead and not implicit def.
+        // For example:
+        // renamable $r9d = MOV32r0 implicit-def dead $eflags, implicit-def $r9
+        // First operand is $r9d and it is not implicit def and not dead, So
+        // it is valid and we can use it in \DefedReg.
         if (i == 0 && !MO.isImplicit() && !MO.isDead())
           DefedReg = MO.getReg();
-        else if (i != 0 && DefedReg != MCRegister::NoRegister) {
+        // If DefedReg has a valid register, check the other operands
+        else if (DefedReg != MCRegister::NoRegister) {
+          // If the machineOperand is Dead and Implicit then continue
+          // to next operand.
           if (MO.isDead() && MO.isImplicit())
             continue;
-          if (MO.isImplicit() && TRI->regsOverlap(MO.getReg(), DefedReg))
+          // If the machineOperand is Implicit and alias with DefedReg then
+          // continue to next operand.
+          if (MO.isImplicit() && TRI->isSubRegister(MO.getReg(), DefedReg))
             continue;
           return false;
         } else



More information about the llvm-commits mailing list