[llvm] [Codegen] Remove redundant instruction using machinelateCleanup (PR #139716)
Rohit Aggarwal via llvm-commits
llvm-commits at lists.llvm.org
Mon May 19 06:53:54 PDT 2025
https://github.com/rohitaggarwal007 updated https://github.com/llvm/llvm-project/pull/139716
>From 214caf495e9198ba853da5a89dfafda1576808a2 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 13 May 2025 17:20:55 +0530
Subject: [PATCH 1/7] Remove redundant pseudo mov instruction
---
llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 4 +-
.../implicit-def-subreg-to-reg-regression.ll | 1 -
llvm/test/CodeGen/AMDGPU/call-waitcnt.ll | 1 -
.../CodeGen/AMDGPU/captured-frame-index.ll | 3 +-
.../AMDGPU/cgp-addressing-modes-gfx1030.ll | 1 -
.../AMDGPU/cgp-addressing-modes-gfx908.ll | 1 -
.../CodeGen/AMDGPU/cgp-addressing-modes.ll | 3 +-
llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 12 -
...frame-setup-without-sgpr-to-vgpr-spills.ll | 2 -
.../AMDGPU/gfx-callable-return-types.ll | 28 +--
.../local-stack-alloc-block-sp-reference.ll | 2 -
.../AMDGPU/preserve-wwm-copy-dst-reg.ll | 2 -
.../AMDGPU/required-export-priority.ll | 1 -
llvm/test/CodeGen/AMDGPU/sibling-call.ll | 15 +-
.../CodeGen/AMDGPU/spill-scavenge-offset.ll | 8 -
.../CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 1 -
.../CodeGen/X86/AMX/amx-ldtilecfg-insert.ll | 2 -
llvm/test/CodeGen/X86/avx-load-store.ll | 1 -
llvm/test/CodeGen/X86/avx512-i1test.ll | 14 +-
llvm/test/CodeGen/X86/isel-brcond-fcmp.ll | 106 +++++++-
llvm/test/CodeGen/X86/isel-brcond-icmp.ll | 232 +++++-------------
llvm/test/CodeGen/X86/pr36602.ll | 1 -
llvm/test/CodeGen/X86/pr38795.ll | 1 -
llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll | 1 -
.../CodeGen/X86/scheduler-backtracking.ll | 5 -
llvm/test/CodeGen/X86/tail-opts.ll | 5 -
.../vector-shuffle-combining-avx512bwvl.ll | 6 +-
llvm/test/CodeGen/X86/x86-cmov-converter.ll | 2 -
llvm/test/CodeGen/X86/zext-extract_subreg.ll | 3 -
...dgpu_generated_funcs.ll.generated.expected | 1 -
...pu_generated_funcs.ll.nogenerated.expected | 1 -
.../x86_generated_funcs.ll.generated.expected | 1 -
...86_generated_funcs.ll.nogenerated.expected | 1 -
33 files changed, 183 insertions(+), 285 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c8c8ed99d93ea..c3b6115338f05 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,11 +186,13 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
- if (MO.isDef()) {
+ if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
+ } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+ return DefedReg.isValid();
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..08346f1a857eb 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,7 +80,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: stp xzr, xzr, [sp]
; CHECK-NEXT: stp x8, xzr, [sp, #16]
; CHECK-NEXT: bl _fprintf
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 9abb50651146a..6889110752ae0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,7 +41,6 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 963b3a55259fa..852ffd5fa7183 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -113,8 +113,7 @@ define amdgpu_kernel void @stored_fi_to_fi() #0 {
; GCN-LABEL: {{^}}stored_fi_to_global:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[FI]]
+; GCN: buffer_store_dword v{{[0-9]+}}
define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
%tmp = alloca float, addrspace(5)
store float 0.0, ptr addrspace(5) %tmp
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 81f768f303ca1..98771dcb441cc 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,7 +31,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
; GCN-NEXT: v_cmpx_ne_u32_e32 0, v1
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index 49370e2fbf1b6..d212d7d52d841 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,7 +33,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 0f9407b77aa83..8f92ee42c066a 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -72,8 +72,7 @@ done:
; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
; GCN: s_and_saveexec_b64
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
-; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; GFX9: global_load_sbyte {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; GCN: {{^}}.LBB2_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..a4ae669617263 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,8 +323,6 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
-; SDAG-NEXT: v_mov_b32_e32 v12, 0
-; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: v_mov_b32_e32 v14, 0
; SDAG-NEXT: v_mov_b32_e32 v15, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1107,8 +1105,6 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1679,8 +1675,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1874,8 +1868,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2562,8 +2554,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2737,8 +2727,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
-; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 13884eb788d8a..a3c7eb8c56fb0 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,7 +53,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0
@@ -77,7 +76,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 6384fdba7a45a..e8aa64f0b8f15 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
-; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_mov_b32_e32 v31, 0
; GFX11-NEXT: s_mov_b32 s1, return_72xi32 at abs32@hi
; GFX11-NEXT: s_mov_b32 s0, return_72xi32 at abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index a3ebaec4811a9..97b7e26d1230b 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,12 +225,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 2d95ec6f699dc..3d6e7c532348f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,7 +395,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
-; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
@@ -743,7 +742,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
-; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index 4aa1ddee2efe3..e145380dca59e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,7 +267,6 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GCN-NEXT: v_cndmask_b32_e32 v0, 16, v2, vcc_lo
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: scratch_store_b32 v0, v1, off
; GCN-NEXT: scratch_load_b32 v0, off, off
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 35234236b848f..ddb7d6b9c3936 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,11 +388,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32 at gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32 at gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_writelane_b32 v40, s30, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
@@ -423,6 +419,9 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
@@ -528,10 +527,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mov_b32_e32 v5, 0
@@ -560,6 +555,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: v_mov_b32_e32 v28, 0
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
entry:
@@ -928,7 +926,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed at rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed at rel32@hi+12
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 50056b62b3397..e9aebeef6ea6d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,7 +9971,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x80c00
@@ -9989,7 +9988,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10007,7 +10005,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81400
@@ -10025,7 +10022,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10043,7 +10039,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
@@ -10061,7 +10056,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10105,7 +10099,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[34:35]
-; GFX6-NEXT: s_mov_b64 s[34:35], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s44, 0x82c00
@@ -10165,7 +10158,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[4:5]
; GFX6-NEXT: s_mov_b64 s[36:37], s[0:1]
-; GFX6-NEXT: s_mov_b64 s[4:5], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s6, 0x80800
diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
index 8d690ba06e3bd..ac88ed8d80ff8 100644
--- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -35,7 +35,6 @@ define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, pt
; CHECK-NEXT: andl $1, %ebp
; CHECK-NEXT: xorpd %xmm0, %xmm0
; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_7: # %bb.i28.i
diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
index 06cf968512db8..2f6e2eae85c25 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
@@ -174,7 +174,6 @@ define dso_local void @test4(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: incl %edi
; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_4
; CHECK-NEXT: .LBB3_2: # %amx2
@@ -190,7 +189,6 @@ define dso_local void @test4(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: decl %edi
; CHECK-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: ldtilecfg -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_2
; CHECK-NEXT: .LBB3_4: # %amx1
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 3f856d33145d8..ef7edf700ca9e 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -216,7 +216,6 @@ define void @f_f() nounwind {
; CHECK-NEXT: jne .LBB9_2
; CHECK-NEXT: # %bb.1: # %cif_mask_all
; CHECK-NEXT: .LBB9_2: # %cif_mask_mixed
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB9_4
; CHECK-NEXT: # %bb.3: # %cif_mixed_test_all
diff --git a/llvm/test/CodeGen/X86/avx512-i1test.ll b/llvm/test/CodeGen/X86/avx512-i1test.ll
index d43f05bbd5a1d..c5d4c87d66da2 100644
--- a/llvm/test/CodeGen/X86/avx512-i1test.ll
+++ b/llvm/test/CodeGen/X86/avx512-i1test.ll
@@ -14,13 +14,13 @@ define void @func() {
; CHECK-NEXT: retq
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb33
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.2: # %bb35
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: jmp .LBB0_1
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: # %bb.2: # %bb35
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: jmp .LBB0_1
bb1:
br i1 poison, label %L_10, label %L_10
diff --git a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
index 5a28e094f8a3c..4653c496ca051 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
@@ -531,21 +531,19 @@ bb1:
define i32 @fcmp_ogt1(float %x) {
; SDAG-X64-LABEL: fcmp_ogt1:
; SDAG-X64: ## %bb.0:
-; SDAG-X64-NEXT: xorl %eax, %eax
-; SDAG-X64-NEXT: testb %al, %al
-; SDAG-X64-NEXT: je LBB16_1
-; SDAG-X64-NEXT: ## %bb.2: ## %bb1
-; SDAG-X64-NEXT: xorl %eax, %eax
-; SDAG-X64-NEXT: retq
-; SDAG-X64-NEXT: LBB16_1: ## %bb2
-; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: testb %al, %al
+; SDAG-X64-NEXT: jne LBB16_2
+; SDAG-X64-NEXT: ## %bb.1: ## %bb2
+; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: LBB16_2: ## %bb1
; SDAG-X64-NEXT: retq
-
+;
; FASTISEL-X64-LABEL: fcmp_ogt1:
; FASTISEL-X64: ## %bb.0:
-; FASTISEL-X64: movl $1, %eax
-; FASTISEL-X64: retq
-
+; FASTISEL-X64-NEXT: movl $1, %eax
+; FASTISEL-X64-NEXT: retq
+;
; GISEL-X64-LABEL: fcmp_ogt1:
; GISEL-X64: ## %bb.0:
; GISEL-X64-NEXT: ucomiss %xmm0, %xmm0
@@ -558,6 +556,8 @@ define i32 @fcmp_ogt1(float %x) {
; GISEL-X64-NEXT: LBB16_1: ## %bb2
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
+
+
%1 = fcmp ogt float %x, %x
br i1 %1, label %bb1, label %bb2
bb2:
@@ -667,6 +667,21 @@ bb1:
}
define i32 @fcmp_olt1(float %x) {
+; SDAG-X64-LABEL: fcmp_olt1:
+; SDAG-X64: ## %bb.0:
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: testb %al, %al
+; SDAG-X64-NEXT: jne LBB20_2
+; SDAG-X64-NEXT: ## %bb.1: ## %bb2
+; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: LBB20_2: ## %bb1
+; SDAG-X64-NEXT: retq
+;
+; FASTISEL-X64-LABEL: fcmp_olt1:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: movl $1, %eax
+; FASTISEL-X64-NEXT: retq
+;
; GISEL-X64-LABEL: fcmp_olt1:
; GISEL-X64: ## %bb.0:
; GISEL-X64-NEXT: ucomiss %xmm0, %xmm0
@@ -788,6 +803,22 @@ bb1:
}
define i32 @fcmp_one1(float %x) {
+; SDAG-X64-LABEL: fcmp_one1:
+; SDAG-X64: ## %bb.0:
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: testb %al, %al
+; SDAG-X64-NEXT: je LBB24_1
+; SDAG-X64-NEXT: ## %bb.2: ## %bb1
+; SDAG-X64-NEXT: retq
+; SDAG-X64-NEXT: LBB24_1: ## %bb2
+; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: retq
+;
+; FASTISEL-X64-LABEL: fcmp_one1:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: movl $1, %eax
+; FASTISEL-X64-NEXT: retq
+;
; GISEL-X64-LABEL: fcmp_one1:
; GISEL-X64: ## %bb.0:
; GISEL-X64-NEXT: ucomiss %xmm0, %xmm0
@@ -973,6 +1004,23 @@ bb1:
}
define i32 @fcmp_ueq1(float %x) {
+; SDAG-X64-LABEL: fcmp_ueq1:
+; SDAG-X64: ## %bb.0:
+; SDAG-X64-NEXT: movb $1, %al
+; SDAG-X64-NEXT: testb %al, %al
+; SDAG-X64-NEXT: jne LBB30_2
+; SDAG-X64-NEXT: ## %bb.1: ## %bb2
+; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: retq
+; SDAG-X64-NEXT: LBB30_2: ## %bb1
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: retq
+;
+; FASTISEL-X64-LABEL: fcmp_ueq1:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: xorl %eax, %eax
+; FASTISEL-X64-NEXT: retq
+;
; GISEL-X64-LABEL: fcmp_ueq1:
; GISEL-X64: ## %bb.0:
; GISEL-X64-NEXT: ucomiss %xmm0, %xmm0
@@ -1094,6 +1142,23 @@ bb1:
}
define i32 @fcmp_uge1(float %x) {
+; SDAG-X64-LABEL: fcmp_uge1:
+; SDAG-X64: ## %bb.0:
+; SDAG-X64-NEXT: movb $1, %al
+; SDAG-X64-NEXT: testb %al, %al
+; SDAG-X64-NEXT: je LBB34_1
+; SDAG-X64-NEXT: ## %bb.2: ## %bb1
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: retq
+; SDAG-X64-NEXT: LBB34_1: ## %bb2
+; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: retq
+;
+; FASTISEL-X64-LABEL: fcmp_uge1:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: xorl %eax, %eax
+; FASTISEL-X64-NEXT: retq
+;
; GISEL-X64-LABEL: fcmp_uge1:
; GISEL-X64: ## %bb.0:
; GISEL-X64-NEXT: ucomiss %xmm0, %xmm0
@@ -1215,6 +1280,23 @@ bb1:
}
define i32 @fcmp_ule1(float %x) {
+; SDAG-X64-LABEL: fcmp_ule1:
+; SDAG-X64: ## %bb.0:
+; SDAG-X64-NEXT: movb $1, %al
+; SDAG-X64-NEXT: testb %al, %al
+; SDAG-X64-NEXT: je LBB38_1
+; SDAG-X64-NEXT: ## %bb.2: ## %bb1
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: retq
+; SDAG-X64-NEXT: LBB38_1: ## %bb2
+; SDAG-X64-NEXT: movl $1, %eax
+; SDAG-X64-NEXT: retq
+;
+; FASTISEL-X64-LABEL: fcmp_ule1:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: xorl %eax, %eax
+; FASTISEL-X64-NEXT: retq
+;
; GISEL-X64-LABEL: fcmp_ule1:
; GISEL-X64: ## %bb.0:
; GISEL-X64-NEXT: ucomiss %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
index 59a45d9d72f5b..869cae6ac8f65 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
@@ -31,18 +31,6 @@ define i32 @icmp_eq_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_eq_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jne LBB0_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB0_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_eq_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -89,18 +77,6 @@ define i32 @icmp_ne_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_ne_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: je LBB1_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB1_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_ne_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -147,18 +123,6 @@ define i32 @icmp_ugt_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_ugt_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jbe LBB2_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB2_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_ugt_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -205,18 +169,6 @@ define i32 @icmp_uge_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_uge_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jb LBB3_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB3_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_uge_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -263,18 +215,6 @@ define i32 @icmp_ult_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_ult_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jae LBB4_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB4_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_ult_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -321,18 +261,6 @@ define i32 @icmp_ule_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_ule_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: ja LBB5_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB5_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_ule_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -379,18 +307,6 @@ define i32 @icmp_sgt_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_sgt_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jle LBB6_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB6_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_sgt_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -437,18 +353,6 @@ define i32 @icmp_sge_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_sge_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jl LBB7_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB7_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_sge_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -495,18 +399,6 @@ define i32 @icmp_slt_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_slt_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jge LBB8_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB8_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_slt_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -553,18 +445,6 @@ define i32 @icmp_sle_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-; X86-LABEL: icmp_sle_2:
-; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl {{[0-9]+\(%esp\), %eax|%eax, [0-9]+\(%esp\)}}
-; X86-NEXT: jg LBB9_1
-; X86-NEXT: ## %bb.2: ## %bb1
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: retl
-; X86-NEXT: LBB9_1: ## %bb2
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: retl
-;
; GISEL-X86-LABEL: icmp_sle_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -594,15 +474,15 @@ define i32 @icmp_eq(i32 %x) {
; SDAG-NEXT: je LBB10_1
; SDAG-NEXT: ## %bb.2: ## %bb1
; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
; SDAG-NEXT: LBB10_1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_eq:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: xorl %eax, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_eq:
; GISEL-X64: ## %bb.0:
@@ -643,18 +523,16 @@ define i32 @icmp_ne(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-; SDAG-NEXT: je LBB11_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
-; SDAG-NEXT: LBB11_1: ## %bb2
+; SDAG-NEXT: jne LBB11_2
+; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: LBB11_2: ## %bb1
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_ne:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: movl $1, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_ne:
; GISEL-X64: ## %bb.0:
@@ -695,18 +573,16 @@ define i32 @icmp_ugt(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-; SDAG-NEXT: je LBB12_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
-; SDAG-NEXT: LBB12_1: ## %bb2
+; SDAG-NEXT: jne LBB12_2
+; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: LBB12_2: ## %bb1
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_ugt:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: movl $1, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_ugt:
; GISEL-X64: ## %bb.0:
@@ -750,15 +626,15 @@ define i32 @icmp_uge(i32 %x) {
; SDAG-NEXT: je LBB13_1
; SDAG-NEXT: ## %bb.2: ## %bb1
; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
; SDAG-NEXT: LBB13_1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
;
-; FASTISEL-X64-LABEL: icmp_uge:
-; FASTISEL-X64: ## %bb.0:
-; FASTISEL-X64-NEXT: xorl %eax, %eax
-; FASTISEL-X64-NEXT: retq
+; FASTISEL-LABEL: icmp_uge:
+; FASTISEL: ## %bb.0:
+; FASTISEL-NEXT: xorl %eax, %eax
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_uge:
; GISEL-X64: ## %bb.0:
@@ -786,6 +662,10 @@ define i32 @icmp_uge(i32 %x) {
; GISEL-X86-NEXT: LBB13_1: ## %bb2
; GISEL-X86-NEXT: movl $1, %eax
; GISEL-X86-NEXT: retl
+; FASTISEL-X64-LABEL: icmp_uge:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: xorl %eax, %eax
+; FASTISEL-X64-NEXT: retq
%1 = icmp uge i32 %x, %x
br i1 %1, label %bb1, label %bb2
bb2:
@@ -799,18 +679,16 @@ define i32 @icmp_ult(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-; SDAG-NEXT: je LBB14_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
-; SDAG-NEXT: LBB14_1: ## %bb2
+; SDAG-NEXT: jne LBB14_2
+; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: LBB14_2: ## %bb1
+; SDAG-NEXT: ret{{[l|q]}}
;
-; FASTISEL-X64-LABEL: icmp_ult:
-; FASTISEL-X64: ## %bb.0:
-; FASTISEL-X64-NEXT: movl $1, %eax
-; FASTISEL-X64-NEXT: ret{{q|l}}
+; FASTISEL-LABEL: icmp_ult:
+; FASTISEL: ## %bb.0:
+; FASTISEL-NEXT: movl $1, %eax
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_ult:
; GISEL-X64: ## %bb.0:
@@ -838,6 +716,10 @@ define i32 @icmp_ult(i32 %x) {
; GISEL-X86-NEXT: LBB14_1: ## %bb2
; GISEL-X86-NEXT: movl $1, %eax
; GISEL-X86-NEXT: retl
+; FASTISEL-X64-LABEL: icmp_ult:
+; FASTISEL-X64: ## %bb.0:
+; FASTISEL-X64-NEXT: movl $1, %eax
+; FASTISEL-X64-NEXT: ret{{q|l}}
%1 = icmp ult i32 %x, %x
br i1 %1, label %bb1, label %bb2
bb2:
@@ -854,15 +736,15 @@ define i32 @icmp_ule(i32 %x) {
; SDAG-NEXT: je LBB15_1
; SDAG-NEXT: ## %bb.2: ## %bb1
; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
; SDAG-NEXT: LBB15_1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_ule:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: xorl %eax, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_ule:
; GISEL-X64: ## %bb.0:
@@ -903,18 +785,16 @@ define i32 @icmp_sgt(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-; SDAG-NEXT: je LBB16_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
-; SDAG-NEXT: LBB16_1: ## %bb2
+; SDAG-NEXT: jne LBB16_2
+; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: LBB16_2: ## %bb1
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_sgt:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: movl $1, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_sgt:
; GISEL-X64: ## %bb.0:
@@ -958,15 +838,15 @@ define i32 @icmp_sge(i32 %x) {
; SDAG-NEXT: je LBB17_1
; SDAG-NEXT: ## %bb.2: ## %bb1
; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
; SDAG-NEXT: LBB17_1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_sge:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: xorl %eax, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_sge:
; GISEL-X64: ## %bb.0:
@@ -1007,18 +887,16 @@ define i32 @icmp_slt(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-; SDAG-NEXT: je LBB18_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
-; SDAG-NEXT: LBB18_1: ## %bb2
+; SDAG-NEXT: jne LBB18_2
+; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: LBB18_2: ## %bb1
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_slt:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: movl $1, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_slt:
; GISEL-X64: ## %bb.0:
@@ -1062,15 +940,15 @@ define i32 @icmp_sle(i32 %x) {
; SDAG-NEXT: je LBB19_1
; SDAG-NEXT: ## %bb.2: ## %bb1
; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
; SDAG-NEXT: LBB19_1: ## %bb2
; SDAG-NEXT: movl $1, %eax
-; SDAG-NEXT: ret{{q|l}}
+; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_sle:
; FASTISEL: ## %bb.0:
; FASTISEL-NEXT: xorl %eax, %eax
-; FASTISEL-NEXT: ret{{q|l}}
+; FASTISEL-NEXT: ret{{[l|q]}}
;
; GISEL-X64-LABEL: icmp_sle:
; GISEL-X64: ## %bb.0:
@@ -1105,3 +983,5 @@ bb2:
bb1:
ret i32 0
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X86: {{.*}}
diff --git a/llvm/test/CodeGen/X86/pr36602.ll b/llvm/test/CodeGen/X86/pr36602.ll
index fa2e05e863336..fe9490efb838c 100644
--- a/llvm/test/CodeGen/X86/pr36602.ll
+++ b/llvm/test/CodeGen/X86/pr36602.ll
@@ -9,7 +9,6 @@ define i32 @fn2() {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1: # %bb1
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB0_2: # %bb2
; CHECK-NEXT: movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index c3c96e8228797..c7b464cbe8a92 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -128,7 +128,6 @@ define dso_local void @fn() {
; CHECK-NEXT: jne .LBB0_15
; CHECK-NEXT: # %bb.14: # %if.then31
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: xorl %ebp, %ebp
; CHECK-NEXT: jmp .LBB0_15
; CHECK-NEXT: .p2align 4
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c24823538aa14..2f605de07040f 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -288,7 +288,6 @@ define ptr @SyFgets(ptr %line, i64 %length, i64 %fid) {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je LBB0_54
; CHECK-NEXT: ## %bb.50: ## %for.body1664.lr.ph
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebp ## 4-byte Reload
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index 426587a84ce17..cfe1e099e0e0c 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -754,7 +754,6 @@ define i256 @PR25498(i256 %a) nounwind {
; ILP-NEXT: testq %r11, %r11
; ILP-NEXT: cmovneq %rdx, %rcx
; ILP-NEXT: orq $128, %rcx
-; ILP-NEXT: xorl %edi, %edi
; ILP-NEXT: orq %r10, %r9
; ILP-NEXT: cmovneq %rsi, %rcx
; ILP-NEXT: jmp .LBB4_3
@@ -803,7 +802,6 @@ define i256 @PR25498(i256 %a) nounwind {
; HYBRID-NEXT: orq $128, %rcx
; HYBRID-NEXT: orq %r10, %r9
; HYBRID-NEXT: cmovneq %rdx, %rcx
-; HYBRID-NEXT: xorl %edi, %edi
; HYBRID-NEXT: jmp .LBB4_3
; HYBRID-NEXT: .LBB4_1:
; HYBRID-NEXT: movl $256, %ecx # imm = 0x100
@@ -850,7 +848,6 @@ define i256 @PR25498(i256 %a) nounwind {
; BURR-NEXT: orq $128, %rcx
; BURR-NEXT: orq %r10, %r9
; BURR-NEXT: cmovneq %rdx, %rcx
-; BURR-NEXT: xorl %edi, %edi
; BURR-NEXT: jmp .LBB4_3
; BURR-NEXT: .LBB4_1:
; BURR-NEXT: movl $256, %ecx # imm = 0x100
@@ -897,7 +894,6 @@ define i256 @PR25498(i256 %a) nounwind {
; SRC-NEXT: orq $128, %rcx
; SRC-NEXT: orq %r10, %r9
; SRC-NEXT: cmovneq %rdx, %rcx
-; SRC-NEXT: xorl %edi, %edi
; SRC-NEXT: jmp .LBB4_3
; SRC-NEXT: .LBB4_1:
; SRC-NEXT: movl $256, %ecx # imm = 0x100
@@ -944,7 +940,6 @@ define i256 @PR25498(i256 %a) nounwind {
; LIN-NEXT: cmoveq %rsi, %rcx
; LIN-NEXT: orq %r10, %r9
; LIN-NEXT: cmoveq %rdx, %rcx
-; LIN-NEXT: xorl %edi, %edi
; LIN-NEXT: jmp .LBB4_3
; LIN-NEXT: .LBB4_1:
; LIN-NEXT: movl $256, %ecx # imm = 0x100
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index d9ab2f7d1f5fb..b5e9e987188ea 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -245,11 +245,9 @@ define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind {
; CHECK-NEXT: jne .LBB3_9
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: movzbl 0, %ebx
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB3_8
; CHECK-NEXT: # %bb.2: # %bb.i
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB3_8
; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
@@ -519,7 +517,6 @@ define dso_local void @two() nounwind optsize {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB7_1
; CHECK-NEXT: # %bb.2: # %return
@@ -560,7 +557,6 @@ define dso_local void @two_pgso() nounwind !prof !14 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB8_1
; CHECK-NEXT: # %bb.2: # %return
@@ -603,7 +599,6 @@ define dso_local void @two_minsize() nounwind minsize {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB9_1
; CHECK-NEXT: # %bb.2: # %return
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index ee59169498d27..d4df6028b160f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -270,11 +270,7 @@ define i64 @PR55050() {
; X86: # %bb.0: # %entry
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testb %al, %al
-; X86-NEXT: jne .LBB15_2
-; X86-NEXT: # %bb.1: # %if
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: .LBB15_2: # %exit
-; X86-NEXT: movl %eax, %edx
+; X86-NEXT: movl %eax, %edx
; X86-NEXT: retl
;
; X64-LABEL: PR55050:
diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index b02da217e76b2..ca3d54ac0899f 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -365,7 +365,6 @@ define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
; CHECK-NEXT: jl .LBB3_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: movl %edi, %ecx
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl $1, %edx
; CHECK-NEXT: .LBB3_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -387,7 +386,6 @@ define i32 @MaxIndex_unpredictable(i32 %n, ptr nocapture readonly %a) #0 {
; CHECK-FORCEALL-NEXT: jl .LBB3_3
; CHECK-FORCEALL-NEXT: # %bb.1: # %for.body.preheader
; CHECK-FORCEALL-NEXT: movl %edi, %ecx
-; CHECK-FORCEALL-NEXT: xorl %eax, %eax
; CHECK-FORCEALL-NEXT: movl $1, %edx
; CHECK-FORCEALL-NEXT: .LBB3_2: # %for.body
; CHECK-FORCEALL-NEXT: # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/X86/zext-extract_subreg.ll b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
index 877f11632b768..f31e099cf37b7 100644
--- a/llvm/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
@@ -8,7 +8,6 @@ define void @t() nounwind ssp {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_6
; CHECK-NEXT: ## %bb.1: ## %if.end.i
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je LBB0_2
; CHECK-NEXT: LBB0_6: ## %return
@@ -20,11 +19,9 @@ define void @t() nounwind ssp {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_5
; CHECK-NEXT: ## %bb.3: ## %cond.true190
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne LBB0_5
; CHECK-NEXT: ## %bb.4: ## %cond.true225
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: LBB0_5: ## %cond.false205
; CHECK-NEXT: ud2
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index 429bee4195fa9..495f3c50b589f 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -96,7 +96,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index 842fd8836da7e..0cb936cbf77d3 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -37,7 +37,6 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected
index e74cc7c452034..e77de3d7bec7a 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected
@@ -89,7 +89,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: callq OUTLINED_FUNCTION_0
; CHECK-NEXT: .LBB0_6:
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: addq $20, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected
index 96f3ac99e21bb..bcd78971b065f 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected
@@ -30,7 +30,6 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: callq OUTLINED_FUNCTION_0
; CHECK-NEXT: .LBB0_6:
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: addq $20, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
>From 9a1de88e67ba34c8f6d1cbcece3ed3173a6ce9b2 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 13 May 2025 19:16:48 +0530
Subject: [PATCH 2/7] Remove the isPseudo check
---
llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c3b6115338f05..43945bfb9e532 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -191,7 +191,7 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
DefedReg = MO.getReg();
else
return false;
- } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+ } else if (MI->isMoveImmediate()) {
return DefedReg.isValid();
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
>From 2ab66a2a6342e26267fd21bd64ef9a18797cbdad Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Tue, 13 May 2025 23:56:00 +0530
Subject: [PATCH 3/7] Fix the test cases
---
llvm/test/CodeGen/X86/isel-brcond-fcmp.ll | 24 -------
llvm/test/CodeGen/X86/isel-brcond-icmp.ll | 83 -----------------------
2 files changed, 107 deletions(-)
diff --git a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
index 808e4949e7726..cfafe6500f7f4 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-fcmp.ll
@@ -533,19 +533,10 @@ define i32 @fcmp_ogt1(float %x) {
; SDAG-X64: ## %bb.0:
; SDAG-X64-NEXT: xorl %eax, %eax
; SDAG-X64-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-X64-NEXT: jne LBB16_2
; SDAG-X64-NEXT: ## %bb.1: ## %bb2
; SDAG-X64-NEXT: movl $1, %eax
; SDAG-X64-NEXT: LBB16_2: ## %bb1
-=======
-; SDAG-X64-NEXT: je LBB16_1
-; SDAG-X64-NEXT: ## %bb.2: ## %bb1
-; SDAG-X64-NEXT: xorl %eax, %eax
-; SDAG-X64-NEXT: retq
-; SDAG-X64-NEXT: LBB16_1: ## %bb2
-; SDAG-X64-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-X64-NEXT: retq
;
; FASTISEL-X64-LABEL: fcmp_ogt1:
@@ -565,8 +556,6 @@ define i32 @fcmp_ogt1(float %x) {
; GISEL-X64-NEXT: LBB16_1: ## %bb2
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
-
-
%1 = fcmp ogt float %x, %x
br i1 %1, label %bb1, label %bb2
bb2:
@@ -680,19 +669,10 @@ define i32 @fcmp_olt1(float %x) {
; SDAG-X64: ## %bb.0:
; SDAG-X64-NEXT: xorl %eax, %eax
; SDAG-X64-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-X64-NEXT: jne LBB20_2
; SDAG-X64-NEXT: ## %bb.1: ## %bb2
; SDAG-X64-NEXT: movl $1, %eax
; SDAG-X64-NEXT: LBB20_2: ## %bb1
-=======
-; SDAG-X64-NEXT: je LBB20_1
-; SDAG-X64-NEXT: ## %bb.2: ## %bb1
-; SDAG-X64-NEXT: xorl %eax, %eax
-; SDAG-X64-NEXT: retq
-; SDAG-X64-NEXT: LBB20_1: ## %bb2
-; SDAG-X64-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-X64-NEXT: retq
;
; FASTISEL-X64-LABEL: fcmp_olt1:
@@ -827,10 +807,6 @@ define i32 @fcmp_one1(float %x) {
; SDAG-X64-NEXT: testb %al, %al
; SDAG-X64-NEXT: je LBB24_1
; SDAG-X64-NEXT: ## %bb.2: ## %bb1
-<<<<<<< HEAD
-=======
-; SDAG-X64-NEXT: xorl %eax, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-X64-NEXT: retq
; SDAG-X64-NEXT: LBB24_1: ## %bb2
; SDAG-X64-NEXT: movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
index a9343909d43b5..80822f53ea702 100644
--- a/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
+++ b/llvm/test/CodeGen/X86/isel-brcond-icmp.ll
@@ -31,8 +31,6 @@ define i32 @icmp_eq_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_eq_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -57,7 +55,6 @@ define i32 @icmp_eq_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_eq_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -104,8 +101,6 @@ define i32 @icmp_ne_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_ne_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -130,7 +125,6 @@ define i32 @icmp_ne_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_ne_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -177,8 +171,6 @@ define i32 @icmp_ugt_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_ugt_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -203,7 +195,6 @@ define i32 @icmp_ugt_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_ugt_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -250,8 +241,6 @@ define i32 @icmp_uge_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_uge_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -276,7 +265,6 @@ define i32 @icmp_uge_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_uge_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -323,8 +311,6 @@ define i32 @icmp_ult_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_ult_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -349,7 +335,6 @@ define i32 @icmp_ult_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_ult_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -396,8 +381,6 @@ define i32 @icmp_ule_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_ule_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -422,7 +405,6 @@ define i32 @icmp_ule_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_ule_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -469,8 +451,6 @@ define i32 @icmp_sgt_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_sgt_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -495,7 +475,6 @@ define i32 @icmp_sgt_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_sgt_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -542,8 +521,6 @@ define i32 @icmp_sge_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_sge_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -568,7 +545,6 @@ define i32 @icmp_sge_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_sge_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -615,8 +591,6 @@ define i32 @icmp_slt_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_slt_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -641,7 +615,6 @@ define i32 @icmp_slt_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_slt_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -688,8 +661,6 @@ define i32 @icmp_sle_2(i32 %x, i32 %y) {
; GISEL-X64-NEXT: movl $1, %eax
; GISEL-X64-NEXT: retq
;
-<<<<<<< HEAD
-=======
; SDAG-X86-LABEL: icmp_sle_2:
; SDAG-X86: ## %bb.0:
; SDAG-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -714,7 +685,6 @@ define i32 @icmp_sle_2(i32 %x, i32 %y) {
; FASTISEL-X86-NEXT: movl $1, %eax
; FASTISEL-X86-NEXT: retl
;
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; GISEL-X86-LABEL: icmp_sle_2:
; GISEL-X86: ## %bb.0:
; GISEL-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -793,19 +763,10 @@ define i32 @icmp_ne(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-NEXT: jne LBB11_2
; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
; SDAG-NEXT: LBB11_2: ## %bb1
-=======
-; SDAG-NEXT: je LBB11_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{[l|q]}}
-; SDAG-NEXT: LBB11_1: ## %bb2
-; SDAG-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_ne:
@@ -852,19 +813,10 @@ define i32 @icmp_ugt(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-NEXT: jne LBB12_2
; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
; SDAG-NEXT: LBB12_2: ## %bb1
-=======
-; SDAG-NEXT: je LBB12_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{[l|q]}}
-; SDAG-NEXT: LBB12_1: ## %bb2
-; SDAG-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_ugt:
@@ -950,10 +902,6 @@ define i32 @icmp_uge(i32 %x) {
; GISEL-X86-NEXT: LBB13_1: ## %bb2
; GISEL-X86-NEXT: movl $1, %eax
; GISEL-X86-NEXT: retl
-; FASTISEL-X64-LABEL: icmp_uge:
-; FASTISEL-X64: ## %bb.0:
-; FASTISEL-X64-NEXT: xorl %eax, %eax
-; FASTISEL-X64-NEXT: retq
%1 = icmp uge i32 %x, %x
br i1 %1, label %bb1, label %bb2
bb2:
@@ -967,19 +915,10 @@ define i32 @icmp_ult(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-NEXT: jne LBB14_2
; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
; SDAG-NEXT: LBB14_2: ## %bb1
-=======
-; SDAG-NEXT: je LBB14_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{[l|q]}}
-; SDAG-NEXT: LBB14_1: ## %bb2
-; SDAG-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_ult:
@@ -1013,10 +952,6 @@ define i32 @icmp_ult(i32 %x) {
; GISEL-X86-NEXT: LBB14_1: ## %bb2
; GISEL-X86-NEXT: movl $1, %eax
; GISEL-X86-NEXT: retl
-; FASTISEL-X64-LABEL: icmp_ult:
-; FASTISEL-X64: ## %bb.0:
-; FASTISEL-X64-NEXT: movl $1, %eax
-; FASTISEL-X64-NEXT: ret{{q|l}}
%1 = icmp ult i32 %x, %x
br i1 %1, label %bb1, label %bb2
bb2:
@@ -1082,19 +1017,10 @@ define i32 @icmp_sgt(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-NEXT: jne LBB16_2
; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
; SDAG-NEXT: LBB16_2: ## %bb1
-=======
-; SDAG-NEXT: je LBB16_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{[l|q]}}
-; SDAG-NEXT: LBB16_1: ## %bb2
-; SDAG-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_sgt:
@@ -1193,19 +1119,10 @@ define i32 @icmp_slt(i32 %x) {
; SDAG: ## %bb.0:
; SDAG-NEXT: xorl %eax, %eax
; SDAG-NEXT: testb %al, %al
-<<<<<<< HEAD
; SDAG-NEXT: jne LBB18_2
; SDAG-NEXT: ## %bb.1: ## %bb2
; SDAG-NEXT: movl $1, %eax
; SDAG-NEXT: LBB18_2: ## %bb1
-=======
-; SDAG-NEXT: je LBB18_1
-; SDAG-NEXT: ## %bb.2: ## %bb1
-; SDAG-NEXT: xorl %eax, %eax
-; SDAG-NEXT: ret{{[l|q]}}
-; SDAG-NEXT: LBB18_1: ## %bb2
-; SDAG-NEXT: movl $1, %eax
->>>>>>> 7038d50d624d43667a7c56501b83e3bb7cc2fe3b
; SDAG-NEXT: ret{{[l|q]}}
;
; FASTISEL-LABEL: icmp_slt:
>From 9ca319001e470049be4b2f6cd136f68fe94b05c6 Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 15 May 2025 17:03:49 +0530
Subject: [PATCH 4/7] Update the logic for defs
---
llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 19 +++++--
llvm/test/CodeGen/AArch64/vector-lrint.ll | 2 +
...tor-flatscratchinit-undefined-behavior2.ll | 1 -
llvm/test/CodeGen/AMDGPU/call-waitcnt.ll | 1 +
.../AMDGPU/cgp-addressing-modes-gfx1030.ll | 1 +
.../AMDGPU/cgp-addressing-modes-gfx908.ll | 1 +
llvm/test/CodeGen/AMDGPU/div_v2i128.ll | 12 +++++
...frame-setup-without-sgpr-to-vgpr-spills.ll | 2 +
.../AMDGPU/gfx-callable-return-types.ll | 28 +++++-----
.../local-stack-alloc-block-sp-reference.ll | 2 +
.../materialize-frame-index-sgpr.gfx10.ll | 51 -------------------
.../AMDGPU/preserve-wwm-copy-dst-reg.ll | 2 +
.../AMDGPU/required-export-priority.ll | 1 +
llvm/test/CodeGen/AMDGPU/sibling-call.ll | 15 +++---
.../AMDGPU/spill-offset-calculation.ll | 5 --
.../CodeGen/AMDGPU/spill-scavenge-offset.ll | 8 +++
.../CodeGen/X86/scheduler-backtracking.ll | 1 +
...dgpu_generated_funcs.ll.generated.expected | 1 +
...pu_generated_funcs.ll.nogenerated.expected | 1 +
19 files changed, 73 insertions(+), 81 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 43945bfb9e532..7f61aec2987b9 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,13 +186,24 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
- if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
+ if (MO.isDef()) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
- else
+ else if (i != 0 && DefedReg != MCRegister::NoRegister) {
+ if (MO.isDead() && MO.isImplicit()) {
+ continue;
+ } else if (MO.isImplicit() &&
+ MI->getParent()
+ ->getParent()
+ ->getSubtarget()
+ .getRegisterInfo()
+ ->isSubRegister(MO.getReg(), DefedReg)) {
+ continue;
+ } else {
+ return false;
+ }
+ } else
return false;
- } else if (MI->isMoveImmediate()) {
- return DefedReg.isValid();
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 602643264e7be..3092f72908200 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -1335,3 +1335,5 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
ret <32 x iXLen> %a
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-i32-GI: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
index 51caa84450ff3..9bb74a9d80264 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior2.ll
@@ -246,7 +246,6 @@ define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
; GFX8-ARCH-FLAT-NEXT: v_readlane_b32 s30, v3, 0
; GFX8-ARCH-FLAT-NEXT: s_mov_b32 s32, s33
; GFX8-ARCH-FLAT-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX8-ARCH-FLAT-NEXT: s_add_i32 s3, s33, 8
; GFX8-ARCH-FLAT-NEXT: scratch_load_dword v3, off, s3 ; 4-byte Folded Reload
; GFX8-ARCH-FLAT-NEXT: s_mov_b64 exec, s[0:1]
; GFX8-ARCH-FLAT-NEXT: s_mov_b32 s33, s2
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 6889110752ae0..9abb50651146a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,6 +41,7 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
+; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 98771dcb441cc..81f768f303ca1 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,6 +31,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
; GCN-NEXT: v_cmpx_ne_u32_e32 0, v1
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
+; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index d212d7d52d841..49370e2fbf1b6 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,6 +33,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
+; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index a4ae669617263..77b78f1f8a333 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,6 +323,8 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
+; SDAG-NEXT: v_mov_b32_e32 v12, 0
+; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: v_mov_b32_e32 v14, 0
; SDAG-NEXT: v_mov_b32_e32 v15, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1105,6 +1107,8 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
+; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1675,6 +1679,8 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
+; SDAG-NEXT: v_mov_b32_e32 v18, 0
+; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1868,6 +1874,8 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
+; SDAG-NEXT: v_mov_b32_e32 v18, 0
+; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2554,6 +2562,8 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
+; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2727,6 +2737,8 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
+; SDAG-NEXT: v_mov_b32_e32 v22, 0
+; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index a3c7eb8c56fb0..13884eb788d8a 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,6 +53,7 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0
@@ -76,6 +77,7 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
+; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index e8aa64f0b8f15..6384fdba7a45a 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
-; GFX11-NEXT: v_mov_b32_e32 v31, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
+; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
; GFX11-NEXT: s_mov_b32 s1, return_72xi32 at abs32@hi
; GFX11-NEXT: s_mov_b32 s0, return_72xi32 at abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 97b7e26d1230b..a3ebaec4811a9 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,10 +225,12 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
+; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
+; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
index 4b5a7c207055a..198e98d30c8c9 100644
--- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll
@@ -32,7 +32,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -61,7 +60,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
@@ -91,7 +89,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v1, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -155,7 +152,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -183,7 +179,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -212,7 +207,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v1, 0
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -248,7 +242,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880
; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -275,7 +268,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880
; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
@@ -302,7 +294,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v1, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4044
; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -358,7 +349,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101100
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -384,7 +374,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101100
; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -410,7 +399,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 {
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v1, 0
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4044
; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -450,7 +438,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880
; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -484,7 +471,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880
; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_mov_b32 s33, s5
@@ -518,7 +504,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v1, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4044
; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_mov_b32 s33, s1
@@ -592,7 +577,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_mov_b32 s32, s33
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s7, s33, 0x101100
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
@@ -625,7 +609,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_mov_b32 s32, s33
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s7, s33, 0x101100
; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
@@ -659,7 +642,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 {
; GFX942-NEXT: v_readlane_b32 s55, v1, 0
; GFX942-NEXT: s_mov_b32 s32, s33
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s3, s33, 0x4044
; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
@@ -691,7 +673,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -715,7 +696,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
@@ -741,7 +721,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -791,7 +770,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v0, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -814,7 +792,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -838,7 +815,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset()
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v0, 0
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -865,7 +841,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800
; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -887,7 +862,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800
; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
@@ -909,7 +883,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x4040
; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -955,7 +928,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v0, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x101000
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -976,7 +948,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v0, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x101000
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -997,7 +968,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v0, 0
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x4040
; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -1030,7 +1000,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -1059,7 +1028,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_mov_b32 s33, s5
@@ -1089,7 +1057,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_mov_b32 s33, s1
@@ -1149,7 +1116,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX8-NEXT: v_readlane_b32 s55, v0, 0
; GFX8-NEXT: s_mov_b32 s32, s33
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s7, s33, 0x101000
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_mov_b32 s33, s6
@@ -1177,7 +1143,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX900-NEXT: v_readlane_b32 s55, v0, 0
; GFX900-NEXT: s_mov_b32 s32, s33
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s7, s33, 0x101000
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_mov_b32 s33, s6
@@ -1206,7 +1171,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp
; GFX942-NEXT: v_readlane_b32 s55, v0, 0
; GFX942-NEXT: s_mov_b32 s32, s33
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s3, s33, 0x4040
; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_mov_b32 s33, s2
@@ -1238,7 +1202,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1
-; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800
; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s5
@@ -1265,7 +1228,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1
-; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800
; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s5
; GFX10_3-NEXT: s_mov_b32 s33, s4
@@ -1291,7 +1253,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v0, 0
; GFX11-NEXT: s_xor_saveexec_b32 s1, -1
-; GFX11-NEXT: s_add_i32 s2, s33, 0x4040
; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_mov_b32 s33, s0
@@ -1347,7 +1308,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX8-NEXT: v_readlane_b32 s55, v0, 0
; GFX8-NEXT: s_mov_b32 s32, s33
; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX8-NEXT: s_add_i32 s5, s33, 0x101000
; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[6:7]
; GFX8-NEXT: s_mov_b32 s33, s4
@@ -1373,7 +1333,6 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp()
; GFX900-NEXT: v_readlane_b32 s55, v0, 0
; GFX900-NEXT: s_mov_b32 s32, s33
; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1
-; GFX900-NEXT: s_add_i32 s5, s33, 0x101000
; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[6:7]
; GFX900-NEXT: s_mov_b32 s33, s4
@@ -1433,7 +1392,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -1461,7 +1419,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
@@ -1487,7 +1444,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_readlane_b32 s55, v1, 0
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
-; GFX11-NEXT: s_add_i32 s1, s32, 0x8040
; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
@@ -1543,7 +1499,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -1570,7 +1525,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
@@ -1596,7 +1550,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset(
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_readlane_b32 s55, v1, 0
; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: s_add_i32 s2, s32, 0x8040
; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload
; GFX942-NEXT: s_mov_b64 exec, s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
@@ -1634,7 +1587,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX10_1-NEXT: ;;#ASMEND
; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800
; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3
; GFX10_1-NEXT: s_mov_b32 exec_lo, s4
@@ -1664,7 +1616,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX10_3-NEXT: ;;#ASMEND
; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0
; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1
-; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800
; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload
; GFX10_3-NEXT: s_mov_b32 exec_lo, s4
; GFX10_3-NEXT: s_waitcnt vmcnt(0)
@@ -1754,7 +1705,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: v_readlane_b32 s55, v1, 0
; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX8-NEXT: s_add_i32 s6, s32, 0x201000
; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX8-NEXT: s_mov_b64 exec, s[4:5]
; GFX8-NEXT: s_waitcnt vmcnt(0)
@@ -1783,7 +1733,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: v_readlane_b32 s55, v1, 0
; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX900-NEXT: s_add_i32 s6, s32, 0x201000
; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload
; GFX900-NEXT: s_mov_b64 exec, s[4:5]
; GFX900-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 3d6e7c532348f..2d95ec6f699dc 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,6 +395,7 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
+; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
@@ -742,6 +743,7 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
+; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index e145380dca59e..4aa1ddee2efe3 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,6 +267,7 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GCN-NEXT: v_cndmask_b32_e32 v0, 16, v2, vcc_lo
+; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: scratch_store_b32 v0, v1, off
; GCN-NEXT: scratch_load_b32 v0, off, off
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index ddb7d6b9c3936..35234236b848f 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,7 +388,11 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32 at gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32 at gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
+; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_writelane_b32 v40, s30, 0
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
@@ -419,9 +423,6 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
@@ -527,6 +528,10 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
+; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mov_b32_e32 v5, 0
@@ -555,9 +560,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: v_mov_b32_e32 v28, 0
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
entry:
@@ -926,6 +928,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed at rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed at rel32@hi+12
; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
index 0452c3b89e9a9..66e2e3d41d95a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -125,7 +125,6 @@ define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: s_add_i32 s10, s32, 0x40100
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s10 ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
@@ -147,7 +146,6 @@ define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
; FLATSCR-NEXT: ;;#ASMEND
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: s_add_i32 s8, s32, 0x1004
; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: ;;#ASMSTART
@@ -466,7 +464,6 @@ define void @test_sgpr_offset_function() {
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
-; MUBUF-NEXT: s_add_i32 s4, s32, 0x40100
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8
@@ -482,7 +479,6 @@ define void @test_sgpr_offset_function() {
; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill
; FLATSCR-NEXT: ;;#ASMSTART
; FLATSCR-NEXT: ;;#ASMEND
-; FLATSCR-NEXT: s_add_i32 s0, s32, 0x1004
; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:8
@@ -586,7 +582,6 @@ define void @test_inst_offset_subregs_function() {
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: s_add_i32 s4, s32, 0x3ff00
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index e9aebeef6ea6d..50056b62b3397 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,6 +9971,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
+; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x80c00
@@ -9988,6 +9989,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
+; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10005,6 +10007,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
+; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81400
@@ -10022,6 +10025,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
+; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10039,6 +10043,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
+; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
@@ -10056,6 +10061,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
+; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10099,6 +10105,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[34:35]
+; GFX6-NEXT: s_mov_b64 s[34:35], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s44, 0x82c00
@@ -10158,6 +10165,7 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[4:5]
; GFX6-NEXT: s_mov_b64 s[36:37], s[0:1]
+; GFX6-NEXT: s_mov_b64 s[4:5], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s6, 0x80800
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
index cfe1e099e0e0c..3ecf4eabdf020 100644
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -940,6 +940,7 @@ define i256 @PR25498(i256 %a) nounwind {
; LIN-NEXT: cmoveq %rsi, %rcx
; LIN-NEXT: orq %r10, %r9
; LIN-NEXT: cmoveq %rdx, %rcx
+; LIN-NEXT: xorl %edi, %edi
; LIN-NEXT: jmp .LBB4_3
; LIN-NEXT: .LBB4_1:
; LIN-NEXT: movl $256, %ecx # imm = 0x100
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
index 495f3c50b589f..429bee4195fa9 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected
@@ -96,6 +96,7 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3:
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
index 0cb936cbf77d3..842fd8836da7e 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected
@@ -37,6 +37,7 @@ define dso_local i32 @check_boundaries() #0 {
; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
; CHECK-NEXT: s_cbranch_execz .LBB0_4
; CHECK-NEXT: ; %bb.3:
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
>From 33373fa7327c375e446e81268782f0e2377e8eab Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Thu, 15 May 2025 19:10:09 +0530
Subject: [PATCH 5/7] Restructure the if else ladder
---
llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 7f61aec2987b9..0b9f984408169 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -190,18 +190,15 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else if (i != 0 && DefedReg != MCRegister::NoRegister) {
- if (MO.isDead() && MO.isImplicit()) {
+ if (MO.isDead() && MO.isImplicit())
continue;
- } else if (MO.isImplicit() &&
- MI->getParent()
- ->getParent()
- ->getSubtarget()
- .getRegisterInfo()
- ->isSubRegister(MO.getReg(), DefedReg)) {
+ if (MO.isImplicit() && MI->getParent()
+ ->getParent()
+ ->getSubtarget()
+ .getRegisterInfo()
+ ->isSubRegister(MO.getReg(), DefedReg))
continue;
- } else {
- return false;
- }
+ return false;
} else
return false;
} else if (MO.getReg() && MO.getReg() != FrameReg)
>From a709a64c927352f08badc715d3743df5f49cffac Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Mon, 19 May 2025 12:04:20 +0530
Subject: [PATCH 6/7] Cleanup test case and pass TRI as argument in isCandidate
function
---
llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 10 +++-------
llvm/test/CodeGen/AArch64/vector-lrint.ll | 2 --
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 0b9f984408169..4bb7bdffad218 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -178,7 +178,7 @@ void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
// and the only reg it may use is FrameReg. Typically this is an immediate
// load or a load-address instruction.
static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
- Register FrameReg) {
+ Register FrameReg, const TargetRegisterInfo *TRI) {
DefedReg = MCRegister::NoRegister;
bool SawStore = true;
if (!MI->isSafeToMove(SawStore) || MI->isImplicitDef() || MI->isInlineAsm())
@@ -192,11 +192,7 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
else if (i != 0 && DefedReg != MCRegister::NoRegister) {
if (MO.isDead() && MO.isImplicit())
continue;
- if (MO.isImplicit() && MI->getParent()
- ->getParent()
- ->getSubtarget()
- .getRegisterInfo()
- ->isSubRegister(MO.getReg(), DefedReg))
+ if (MO.isImplicit() && TRI->regsOverlap(MO.getReg(), DefedReg))
continue;
return false;
} else
@@ -245,7 +241,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
}
Register DefedReg;
- bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
+ bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg, TRI);
// Check for an earlier identical and reusable instruction.
if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 3092f72908200..602643264e7be 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -1335,5 +1335,3 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
ret <32 x iXLen> %a
}
declare <32 x iXLen> @llvm.lrint.v32iXLen.v32f64(<32 x double>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-i32-GI: {{.*}}
>From f7792d2d0cbea0475ff468e280cb832315c2c6bc Mon Sep 17 00:00:00 2001
From: Rohit Aggarwal <Rohit.Aggarwal at amd.com>
Date: Mon, 19 May 2025 17:40:21 +0530
Subject: [PATCH 7/7] Updating the comments and isSubRegister call
---
llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index 4bb7bdffad218..afa9d87507738 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -187,12 +187,23 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
if (MO.isDef()) {
+ // To get the \DefedReg value, we need to check that 1st MachineOperand
+ // is not dead and not implicit def.
+ // For example:
+ // renamable $r9d = MOV32r0 implicit-def dead $eflags, implicit-def $r9
+ // First operand is $r9d and it is not implicit def and not dead, So
+ // it is valid and we can use it in \DefedReg.
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
- else if (i != 0 && DefedReg != MCRegister::NoRegister) {
+ // If DefedReg has a valid register, check the other operands
+ else if (DefedReg != MCRegister::NoRegister) {
+ // If the machineOperand is Dead and Implicit then continue
+ // to next operand.
if (MO.isDead() && MO.isImplicit())
continue;
- if (MO.isImplicit() && TRI->regsOverlap(MO.getReg(), DefedReg))
+ // If the machineOperand is Implicit and alias with DefedReg then
+ // continue to next operand.
+ if (MO.isImplicit() && TRI->isSubRegister(MO.getReg(), DefedReg))
continue;
return false;
} else
More information about the llvm-commits
mailing list