[llvm] e30301a - [MachineCopyPropagation] Make use of lane mask info in basic block liveins (#140248)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 16 08:54:37 PDT 2025


Author: Jay Foad
Date: 2025-05-16T16:54:34+01:00
New Revision: e30301aaecd24141cbdcd740aa4c16c0d9b05307

URL: https://github.com/llvm/llvm-project/commit/e30301aaecd24141cbdcd740aa4c16c0d9b05307
DIFF: https://github.com/llvm/llvm-project/commit/e30301aaecd24141cbdcd740aa4c16c0d9b05307.diff

LOG: [MachineCopyPropagation] Make use of lane mask info in basic block liveins (#140248)

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineCopyPropagation.cpp
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 6eab87c1292e0..6af3154b9ed13 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -553,9 +553,12 @@ void MachineCopyPropagation::readSuccessorLiveIns(
   // If a copy result is livein to a successor, it is not dead.
   for (const MachineBasicBlock *Succ : MBB.successors()) {
     for (const auto &LI : Succ->liveins()) {
-      for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
-        if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
-          MaybeDeadCopies.remove(Copy);
+      for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
+        auto [Unit, Mask] = *U;
+        if ((Mask & LI.LaneMask).any()) {
+          if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
+            MaybeDeadCopies.remove(Copy);
+        }
       }
     }
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 9775a37276dfd..cbceb0885e8db 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -875,7 +875,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_add_rtn_u32 v0, v4, v0
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -980,7 +979,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    s_cbranch_execz .LBB2_2
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_add_rtn_u32 v0, v4, v0
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -4282,7 +4280,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    s_cbranch_execz .LBB10_2
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_sub_rtn_u32 v0, v4, v0
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -4387,7 +4384,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    s_cbranch_execz .LBB10_2
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_sub_rtn_u32 v0, v4, v0
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -6691,7 +6687,6 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_and_rtn_b32 v0, v0, v4
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -6796,7 +6791,6 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_and_rtn_b32 v0, v0, v4
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -8052,7 +8046,6 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    s_cbranch_execz .LBB17_2
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_or_rtn_b32 v0, v4, v0
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -8157,7 +8150,6 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    s_cbranch_execz .LBB17_2
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_or_rtn_b32 v0, v4, v0
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -9412,7 +9404,6 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    s_cbranch_execz .LBB19_2
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_xor_rtn_b32 v0, v4, v0
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -9517,7 +9508,6 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    s_cbranch_execz .LBB19_2
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_xor_rtn_b32 v0, v4, v0
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -10772,7 +10762,6 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_max_rtn_i32 v0, v0, v4
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -10877,7 +10866,6 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_max_rtn_i32 v0, v0, v4
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -12600,7 +12588,6 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_min_rtn_i32 v0, v0, v4
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -12705,7 +12692,6 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_min_rtn_i32 v0, v0, v4
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -14428,7 +14414,6 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:    s_cbranch_execz .LBB27_2
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_max_rtn_u32 v0, v4, v0
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -14533,7 +14518,6 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:    s_cbranch_execz .LBB27_2
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_max_rtn_u32 v0, v4, v0
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv
@@ -16243,7 +16227,6 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
 ; GFX1064_DPP-NEXT:  ; %bb.1:
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1064_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1064_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1064_DPP-NEXT:    ds_min_rtn_u32 v0, v0, v4
 ; GFX1064_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064_DPP-NEXT:    buffer_gl0_inv
@@ -16348,7 +16331,6 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
 ; GFX1164_DPP-NEXT:  ; %bb.1:
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX1164_DPP-NEXT:    v_mov_b32_e32 v4, s6
-; GFX1164_DPP-NEXT:    s_mov_b32 s3, s6
 ; GFX1164_DPP-NEXT:    ds_min_rtn_u32 v0, v0, v4
 ; GFX1164_DPP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164_DPP-NEXT:    buffer_gl0_inv


        


More information about the llvm-commits mailing list