[llvm] [Codegen][Backend] Remove redundant pseudo mov instruction (PR #139716)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 13 05:04:20 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Rohit Aggarwal (rohitaggarwal007)
<details>
<summary>Changes</summary>
Remove redundant pseudo mov instructions after PostRegalloc.
---
Patch is 52.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139716.diff
33 Files Affected:
- (modified) llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp (+3-1)
- (modified) llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/call-waitcnt.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/captured-frame-index.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll (+1-2)
- (modified) llvm/test/CodeGen/AMDGPU/div_v2i128.ll (-12)
- (modified) llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll (+14-14)
- (modified) llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/required-export-priority.ll (-1)
- (modified) llvm/test/CodeGen/AMDGPU/sibling-call.ll (+6-9)
- (modified) llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll (-8)
- (modified) llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll (-1)
- (modified) llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll (-2)
- (modified) llvm/test/CodeGen/X86/avx-load-store.ll (-1)
- (modified) llvm/test/CodeGen/X86/avx512-i1test.ll (+7-7)
- (modified) llvm/test/CodeGen/X86/isel-brcond-fcmp.ll (+94-12)
- (modified) llvm/test/CodeGen/X86/isel-brcond-icmp.ll (+56-176)
- (modified) llvm/test/CodeGen/X86/pr36602.ll (-1)
- (modified) llvm/test/CodeGen/X86/pr38795.ll (-1)
- (modified) llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll (-1)
- (modified) llvm/test/CodeGen/X86/scheduler-backtracking.ll (-5)
- (modified) llvm/test/CodeGen/X86/tail-opts.ll (-5)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll (+1-5)
- (modified) llvm/test/CodeGen/X86/x86-cmov-converter.ll (-2)
- (modified) llvm/test/CodeGen/X86/zext-extract_subreg.ll (-3)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected (-1)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected (-1)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.generated.expected (-1)
- (modified) llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/x86_generated_funcs.ll.nogenerated.expected (-1)
``````````diff
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c8c8ed99d93ea..c3b6115338f05 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -186,11 +186,13 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
- if (MO.isDef()) {
+ if (MO.isDef() && DefedReg == MCRegister::NoRegister) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
+ } else if (MI->isPseudo() && MI->isMoveImmediate()) {
+ return DefedReg.isValid();
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index 0f208f8ed9052..08346f1a857eb 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,7 +80,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: mov x1, xzr
-; CHECK-NEXT: mov w8, #1 ; =0x1
; CHECK-NEXT: stp xzr, xzr, [sp]
; CHECK-NEXT: stp x8, xzr, [sp, #16]
; CHECK-NEXT: bl _fprintf
diff --git a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
index 9abb50651146a..6889110752ae0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-waitcnt.ll
@@ -41,7 +41,6 @@ define amdgpu_kernel void @call_memory_no_dep(ptr addrspace(1) %ptr, i32) #0 {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_store_dword v0, v0, s[6:7]
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 963b3a55259fa..852ffd5fa7183 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -113,8 +113,7 @@ define amdgpu_kernel void @stored_fi_to_fi() #0 {
; GCN-LABEL: {{^}}stored_fi_to_global:
; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
-; GCN: buffer_store_dword [[FI]]
+; GCN: buffer_store_dword v{{[0-9]+}}
define amdgpu_kernel void @stored_fi_to_global(ptr addrspace(1) %ptr) #0 {
%tmp = alloca float, addrspace(5)
store float 0.0, ptr addrspace(5) %tmp
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 81f768f303ca1..98771dcb441cc 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -31,7 +31,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(ptr add
; GCN-NEXT: v_cmpx_ne_u32_e32 0, v1
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index 49370e2fbf1b6..d212d7d52d841 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -33,7 +33,6 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 0f9407b77aa83..8f92ee42c066a 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -72,8 +72,7 @@ done:
; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
; GCN: s_and_saveexec_b64
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
-; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
+; GFX9: global_load_sbyte {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; GCN: {{^}}.LBB2_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(ptr addrspace(1) %out, ptr addrspace(1) %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
index 77b78f1f8a333..a4ae669617263 100644
--- a/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_v2i128.ll
@@ -323,8 +323,6 @@ define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
-; SDAG-NEXT: v_mov_b32_e32 v12, 0
-; SDAG-NEXT: v_mov_b32_e32 v13, 0
; SDAG-NEXT: v_mov_b32_e32 v14, 0
; SDAG-NEXT: v_mov_b32_e32 v15, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1107,8 +1105,6 @@ define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v10, 0
; SDAG-NEXT: v_mov_b32_e32 v11, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1679,8 +1675,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v22, 0
; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -1874,8 +1868,6 @@ define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
-; SDAG-NEXT: v_mov_b32_e32 v18, 0
-; SDAG-NEXT: v_mov_b32_e32 v19, 0
; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2562,8 +2554,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
-; SDAG-NEXT: v_mov_b32_e32 v20, 0
-; SDAG-NEXT: v_mov_b32_e32 v21, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
@@ -2737,8 +2727,6 @@ define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
-; SDAG-NEXT: v_mov_b32_e32 v22, 0
-; SDAG-NEXT: v_mov_b32_e32 v23, 0
; SDAG-NEXT: v_mov_b32_e32 v24, 0
; SDAG-NEXT: v_mov_b32_e32 v25, 0
; SDAG-NEXT: s_mov_b64 s[10:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
index 13884eb788d8a..a3c7eb8c56fb0 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll
@@ -53,7 +53,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v0, s31, 0
@@ -77,7 +76,6 @@ define void @callee_with_stack_and_call() #0 {
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0)
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5]
-; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec
; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1
; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16
; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
index 6384fdba7a45a..e8aa64f0b8f15 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll
@@ -3234,20 +3234,20 @@ define amdgpu_gfx void @call_72xi32() #1 {
; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
-; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0
-; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0
-; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v8, 0
-; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v10, 0
-; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v12, 0
-; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v14, 0
-; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v16, 0
-; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v18, 0
-; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v20, 0
-; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v22, 0
-; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v24, 0
-; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0
-; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0
-; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0
+; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0
+; GFX11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0
+; GFX11-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_mov_b32 v12, 0
+; GFX11-NEXT: v_dual_mov_b32 v13, 0 :: v_dual_mov_b32 v14, 0
+; GFX11-NEXT: v_dual_mov_b32 v15, 0 :: v_dual_mov_b32 v16, 0
+; GFX11-NEXT: v_dual_mov_b32 v17, 0 :: v_dual_mov_b32 v18, 0
+; GFX11-NEXT: v_dual_mov_b32 v19, 0 :: v_dual_mov_b32 v20, 0
+; GFX11-NEXT: v_dual_mov_b32 v21, 0 :: v_dual_mov_b32 v22, 0
+; GFX11-NEXT: v_dual_mov_b32 v23, 0 :: v_dual_mov_b32 v24, 0
+; GFX11-NEXT: v_dual_mov_b32 v25, 0 :: v_dual_mov_b32 v26, 0
+; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v28, 0
+; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v30, 0
+; GFX11-NEXT: v_mov_b32_e32 v31, 0
; GFX11-NEXT: s_mov_b32 s1, return_72xi32 at abs32@hi
; GFX11-NEXT: s_mov_b32 s0, return_72xi32 at abs32@lo
; GFX11-NEXT: v_writelane_b32 v60, s31, 1
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index a3ebaec4811a9..97b7e26d1230b 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -225,12 +225,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_movk_i32 s4, 0x4000
; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1
-; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000
; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0
; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
index 2d95ec6f699dc..3d6e7c532348f 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll
@@ -395,7 +395,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[16:17]
-; GFX908-NEXT: s_mov_b64 s[16:17], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168
; GFX908-NEXT: v_writelane_b32 v2, s31, 0
@@ -743,7 +742,6 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: s_waitcnt vmcnt(0)
; GFX908-NEXT: s_mov_b64 exec, s[4:5]
-; GFX908-NEXT: s_mov_b64 s[4:5], exec
; GFX908-NEXT: s_mov_b64 exec, 1
; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168
; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index 4aa1ddee2efe3..e145380dca59e 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -267,7 +267,6 @@ define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GCN-NEXT: v_cndmask_b32_e32 v0, 16, v2, vcc_lo
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: scratch_store_b32 v0, v1, off
; GCN-NEXT: scratch_load_b32 v0, off, off
; GCN-NEXT: v_mov_b32_e32 v1, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 35234236b848f..ddb7d6b9c3936 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -388,11 +388,7 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: s_add_u32 s4, s4, i32_fastcc_i32_i32_a32i32 at gotpcrel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, i32_fastcc_i32_i32_a32i32 at gotpcrel32@hi+12
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_writelane_b32 v40, s30, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
@@ -423,6 +419,9 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 {
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_readlane_b32 s31, v40, 1
@@ -528,10 +527,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
-; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
-; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
; GCN-NEXT: v_mov_b32_e32 v5, 0
@@ -560,6 +555,9 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
; GCN-NEXT: v_mov_b32_e32 v28, 0
; GCN-NEXT: v_mov_b32_e32 v29, 0
; GCN-NEXT: v_mov_b32_e32 v30, 0
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4
+; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
entry:
@@ -928,7 +926,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
; GCN-NEXT: s_add_u32 s16, s16, void_fastcc_byval_and_stack_passed at rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, void_fastcc_byval_and_stack_passed at rel32@hi+12
; GCN-NEXT: v_mov_b32_e32 v0, 0
-; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index 50056b62b3397..e9aebeef6ea6d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -9971,7 +9971,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x80c00
@@ -9989,7 +9988,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10007,7 +10005,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81400
@@ -10025,7 +10022,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10043,7 +10039,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 0xff
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_mov_b32 s34, 0x81c00
@@ -10061,7 +10056,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr addrspac
; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: s_mov_b64 exec, s[6:7]
-; GFX6-NEXT: s_mov_b64 s[6:7], exec
; GFX6-NEXT: s_mov_b64 exec, 15
; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0
; GFX6-NEXT: s_waitcnt expcnt(0)
@@ -10105,7 +10099,6 @@ define amdgpu_kernel void @test_limited_sgpr(ptr addrspace(1) %out, ptr ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/139716
More information about the llvm-commits
mailing list