[llvm] db5bcb2 - GlobalISel: Fix combine duplicating atomic loads (#111730)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 31 07:55:15 PDT 2024


Author: Matt Arsenault
Date: 2024-10-31T07:55:12-07:00
New Revision: db5bcb24c20e9ad5558ba7bb4f90c3a6000665f1

URL: https://github.com/llvm/llvm-project/commit/db5bcb24c20e9ad5558ba7bb4f90c3a6000665f1
DIFF: https://github.com/llvm/llvm-project/commit/db5bcb24c20e9ad5558ba7bb4f90c3a6000665f1.diff

LOG: GlobalISel: Fix combine duplicating atomic loads (#111730)

The sext_inreg (load) combine was not deleting the old load instruction,
and it would never be deleted if volatile or atomic.

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b7ddf9f479ef8e..7c1bda2163b7a0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1110,6 +1110,9 @@ void CombinerHelper::applySextInRegOfLoad(
   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
                          LoadDef->getPointerReg(), *NewMMO);
   MI.eraseFromParent();
+
+  // Not all loads can be deleted, so make sure the old one is removed.
+  LoadDef->eraseFromParent();
 }
 
 /// Return true if 'MI' is a load or a store that may be fold it's address

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
index 817d1af9c226c8..83912b1e77db20 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
@@ -27,32 +27,12 @@ define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) {
 }
 
 define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v2
-; GFX7-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v2
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT:    flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr %ptr monotonic, align 1
   %ext = sext i8 %load to i32
   ret i32 %ext
@@ -71,32 +51,12 @@ define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr %ptr) {
 }
 
 define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v2
-; GFX7-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v2
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT:    flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr %ptr monotonic, align 1
   %ext = sext i8 %load to i16
   ret i16 %ext
@@ -126,32 +86,12 @@ define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) {
 }
 
 define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
-; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    flat_load_sshort v2, v[0:1] glc
-; GFX7-NEXT:    flat_load_ushort v0, v[0:1] glc
-; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v2
-; GFX7-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_sshort v2, v[0:1] glc
-; GFX8-NEXT:    flat_load_ushort v0, v[0:1] glc
-; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v2
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    flat_load_sshort v2, v[0:1] glc
-; GFX9-NEXT:    flat_load_ushort v3, v[0:1] glc
-; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    flat_load_sshort v0, v[0:1] glc
+; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i16, ptr %ptr monotonic, align 2
   %ext = sext i16 %load to i32
   ret i32 %ext

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
index a3116dd2656649..e2906c3d4fdb24 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
@@ -82,37 +82,28 @@ define i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) {
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0x100f000
 ; GFX6-NEXT:    s_mov_b64 s[4:5], 0
-; GFX6-NEXT:    buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
-; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
+; GFX6-NEXT:    buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; GFX7-NEXT:    flat_load_sbyte v0, v[0:1] glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; GFX8-NEXT:    flat_load_sbyte v0, v[0:1] glc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    global_load_sbyte v2, v[0:1], off glc
-; GFX9-NEXT:    global_load_ubyte v3, v[0:1], off glc
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-NEXT:    global_load_sbyte v0, v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1
@@ -163,37 +154,28 @@ define i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) {
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0x100f000
 ; GFX6-NEXT:    s_mov_b64 s[4:5], 0
-; GFX6-NEXT:    buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
-; GFX6-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc
+; GFX6-NEXT:    buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; GFX7-NEXT:    flat_load_sbyte v0, v[0:1] glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT:    flat_load_ubyte v0, v[0:1] glc
+; GFX8-NEXT:    flat_load_sbyte v0, v[0:1] glc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    global_load_sbyte v2, v[0:1], off glc
-; GFX9-NEXT:    global_load_ubyte v3, v[0:1], off glc
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-NEXT:    global_load_sbyte v0, v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1
@@ -279,37 +261,28 @@ define i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr)
 ; GFX6-NEXT:    s_mov_b32 s6, 0
 ; GFX6-NEXT:    s_mov_b32 s7, 0x100f000
 ; GFX6-NEXT:    s_mov_b64 s[4:5], 0
-; GFX6-NEXT:    buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc
-; GFX6-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc
+; GFX6-NEXT:    buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT:    flat_load_sshort v2, v[0:1] glc
-; GFX7-NEXT:    flat_load_ushort v0, v[0:1] glc
+; GFX7-NEXT:    flat_load_sshort v0, v[0:1] glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_sshort v2, v[0:1] glc
-; GFX8-NEXT:    flat_load_ushort v0, v[0:1] glc
+; GFX8-NEXT:    flat_load_sshort v0, v[0:1] glc
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    global_load_sshort v2, v[0:1], off glc
-; GFX9-NEXT:    global_load_ushort v3, v[0:1], off glc
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v2
+; GFX9-NEXT:    global_load_sshort v0, v[0:1], off glc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
index fb001e09a967a7..1656814d6fb06b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
@@ -65,29 +65,23 @@ define i32 @atomic_load_local_monotonic_i8_sext_to_i32(ptr addrspace(3) %ptr) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    s_mov_b32 m0, -1
-; GFX7-NEXT:    ds_read_i8 v1, v0
-; GFX7-NEXT:    ds_read_u8 v0, v0
+; GFX7-NEXT:    ds_read_i8 v0, v0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    s_mov_b32 m0, -1
-; GFX8-NEXT:    ds_read_i8 v1, v0
-; GFX8-NEXT:    ds_read_u8 v0, v0
+; GFX8-NEXT:    ds_read_i8 v0, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_i8 v1, v0
-; GFX9-NEXT:    ds_read_u8 v0, v0
+; GFX9-NEXT:    ds_read_i8 v0, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
   %ext = sext i8 %load to i32
@@ -127,29 +121,23 @@ define i16 @atomic_load_local_monotonic_i8_sext_to_i16(ptr addrspace(3) %ptr) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    s_mov_b32 m0, -1
-; GFX7-NEXT:    ds_read_i8 v1, v0
-; GFX7-NEXT:    ds_read_u8 v0, v0
+; GFX7-NEXT:    ds_read_i8 v0, v0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    s_mov_b32 m0, -1
-; GFX8-NEXT:    ds_read_i8 v1, v0
-; GFX8-NEXT:    ds_read_u8 v0, v0
+; GFX8-NEXT:    ds_read_i8 v0, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_i8 v1, v0
-; GFX9-NEXT:    ds_read_u8 v0, v0
+; GFX9-NEXT:    ds_read_i8 v0, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
   %ext = sext i8 %load to i16
@@ -216,29 +204,23 @@ define i32 @atomic_load_local_monotonic_i16_sext_to_i32(ptr addrspace(3) %ptr) {
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX7-NEXT:    s_mov_b32 m0, -1
-; GFX7-NEXT:    ds_read_i16 v1, v0
-; GFX7-NEXT:    ds_read_u16 v0, v0
+; GFX7-NEXT:    ds_read_i16 v0, v0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: atomic_load_local_monotonic_i16_sext_to_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    s_mov_b32 m0, -1
-; GFX8-NEXT:    ds_read_i16 v1, v0
-; GFX8-NEXT:    ds_read_u16 v0, v0
+; GFX8-NEXT:    ds_read_i16 v0, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: atomic_load_local_monotonic_i16_sext_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    ds_read_i16 v1, v0
-; GFX9-NEXT:    ds_read_u16 v0, v0
+; GFX9-NEXT:    ds_read_i16 v0, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
   %ext = sext i16 %load to i32

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
index afa81980ebd621..23b80528c80a98 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
@@ -133,7 +133,6 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
     ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1)
@@ -172,7 +171,6 @@ body: |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
     ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
     %0:_(p1) = COPY $vgpr0_vgpr1
     %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1)


        


More information about the llvm-commits mailing list