[llvm-branch-commits] [llvm] GlobalISel: Fix combine duplicating atomic loads (PR #111730)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Oct 10 03:45:02 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/111730
>From 90a347159e1e6494bf95bcaf87e897e553ecc0f5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 9 Oct 2024 22:05:48 +0400
Subject: [PATCH 1/2] GlobalISel: Fix combine duplicating atomic loads
The sext_inreg (load) combine was not deleting the old load instruction,
and it would never be deleted if volatile or atomic.
---
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 1 +
.../AMDGPU/GlobalISel/atomic_load_flat.ll | 96 ++++---------------
.../AMDGPU/GlobalISel/atomic_load_global.ll | 39 ++------
.../AMDGPU/GlobalISel/atomic_load_local_2.ll | 36 ++-----
...lizer-combiner-sextload-from-sextinreg.mir | 2 -
5 files changed, 37 insertions(+), 137 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 14e94d48bf8362..535c827f6a8223 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1110,6 +1110,7 @@ void CombinerHelper::applySextInRegOfLoad(
Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
LoadDef->getPointerReg(), *NewMMO);
MI.eraseFromParent();
+ LoadDef->eraseFromParent();
}
/// Return true if 'MI' is a load or a store that may be fold it's address
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
index 788fb04e842b4e..fc3bc09cf8e3e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll
@@ -27,32 +27,12 @@ define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) {
}
define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT: flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr %ptr monotonic, align 1
%ext = sext i8 %load to i32
ret i32 %ext
@@ -71,32 +51,12 @@ define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr %ptr) {
}
define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT: flat_load_ubyte v3, v[0:1] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr %ptr monotonic, align 1
%ext = sext i8 %load to i16
ret i16 %ext
@@ -126,32 +86,12 @@ define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) {
}
define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) {
-; GFX7-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
-; GFX7: ; %bb.0:
-; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
-; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
-; GFX7-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
-; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v2
-; GFX8-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX9-NEXT: flat_load_ushort v3, v[0:1] glc
-; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GCN-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i16, ptr %ptr monotonic, align 2
%ext = sext i16 %load to i32
ret i32 %ext
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
index 139d841590f85a..7a5e83868fd4b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll
@@ -58,28 +58,21 @@ define i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) {
; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
+; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
+; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc
-; GFX9-NEXT: global_load_ubyte v3, v[0:1], off glc
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1
@@ -117,28 +110,21 @@ define i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) {
; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc
+; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc
+; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc
-; GFX9-NEXT: global_load_ubyte v3, v[0:1], off glc
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1
@@ -201,28 +187,21 @@ define i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr)
; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc
+; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v2
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc
-; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc
+; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc
; GFX8-NEXT: s_waitcnt vmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: atomic_load_global_monotonic_i16_sext_to_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc
-; GFX9-NEXT: global_load_ushort v3, v[0:1], off glc
-; GFX9-NEXT: s_waitcnt vmcnt(1)
-; GFX9-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
index 5823bc3dfd3f5b..bad6f3643462c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll
@@ -65,29 +65,23 @@ define i32 @atomic_load_local_monotonic_i8_sext_to_i32(ptr addrspace(3) %ptr) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 m0, -1
-; GFX7-NEXT: ds_read_i8 v1, v0
-; GFX7-NEXT: ds_read_u8 v0, v0
+; GFX7-NEXT: ds_read_i8 v0, v0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 m0, -1
-; GFX8-NEXT: ds_read_i8 v1, v0
-; GFX8-NEXT: ds_read_u8 v0, v0
+; GFX8-NEXT: ds_read_i8 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ds_read_i8 v1, v0
-; GFX9-NEXT: ds_read_u8 v0, v0
+; GFX9-NEXT: ds_read_i8 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
%ext = sext i8 %load to i32
@@ -127,29 +121,23 @@ define i16 @atomic_load_local_monotonic_i8_sext_to_i16(ptr addrspace(3) %ptr) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 m0, -1
-; GFX7-NEXT: ds_read_i8 v1, v0
-; GFX7-NEXT: ds_read_u8 v0, v0
+; GFX7-NEXT: ds_read_i8 v0, v0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i16:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 m0, -1
-; GFX8-NEXT: ds_read_i8 v1, v0
-; GFX8-NEXT: ds_read_u8 v0, v0
+; GFX8-NEXT: ds_read_i8 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ds_read_i8 v1, v0
-; GFX9-NEXT: ds_read_u8 v0, v0
+; GFX9-NEXT: ds_read_i8 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
%ext = sext i8 %load to i16
@@ -216,29 +204,23 @@ define i32 @atomic_load_local_monotonic_i16_sext_to_i32(ptr addrspace(3) %ptr) {
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 m0, -1
-; GFX7-NEXT: ds_read_i16 v1, v0
-; GFX7-NEXT: ds_read_u16 v0, v0
+; GFX7-NEXT: ds_read_i16 v0, v0
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
-; GFX7-NEXT: v_mov_b32_e32 v0, v1
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: atomic_load_local_monotonic_i16_sext_to_i32:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_mov_b32 m0, -1
-; GFX8-NEXT: ds_read_i16 v1, v0
-; GFX8-NEXT: ds_read_u16 v0, v0
+; GFX8-NEXT: ds_read_i16 v0, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8-NEXT: v_mov_b32_e32 v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: atomic_load_local_monotonic_i16_sext_to_i32:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ds_read_i8 v1, v0
-; GFX9-NEXT: ds_read_u16 v0, v0
+; GFX9-NEXT: ds_read_i8 v0, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
%load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
%ext = sext i16 %load to i32
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
index afa81980ebd621..23b80528c80a98 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir
@@ -133,7 +133,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1)
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1)
@@ -172,7 +171,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
- ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1)
; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1)
>From 3ab6aae0569908b9c7410d436173ec6f9c99936b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 10 Oct 2024 14:44:13 +0400
Subject: [PATCH 2/2] Add comment
---
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 535c827f6a8223..db09be9d6973d7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1110,6 +1110,8 @@ void CombinerHelper::applySextInRegOfLoad(
Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
LoadDef->getPointerReg(), *NewMMO);
MI.eraseFromParent();
+
+ // Not all loads can be deleted, so make sure the old one is removed.
LoadDef->eraseFromParent();
}
More information about the llvm-branch-commits
mailing list