[llvm] ed74583 - [AMDGPU] Don't check for VMEM hazards on GFX10
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 4 13:45:05 PST 2021
Author: Jay Foad
Date: 2021-03-04T21:44:56Z
New Revision: ed7458398a45ab7a976a2e5d52f6f2c58c5ba110
URL: https://github.com/llvm/llvm-project/commit/ed7458398a45ab7a976a2e5d52f6f2c58c5ba110
DIFF: https://github.com/llvm/llvm-project/commit/ed7458398a45ab7a976a2e5d52f6f2c58c5ba110.diff
LOG: [AMDGPU] Don't check for VMEM hazards on GFX10
The hazard where a VMEM reads an SGPR written by a VALU counts as a data
dependency hazard, so no nops are required on GFX10. Tested with Vulkan
CTS on GFX10.1 and GFX10.3.
Differential Revision: https://reviews.llvm.org/D97926
Added:
Modified:
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
llvm/test/CodeGen/AMDGPU/global-saddr-store.ll
llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0482865bd518..f5db222fc05b 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -157,12 +157,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (SIInstrInfo::isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return HazardType;
- // FIXME: Should flat be considered vmem?
- if ((SIInstrInfo::isVMEM(*MI) ||
- SIInstrInfo::isFLAT(*MI))
- && checkVMEMHazards(MI) > 0)
- return HazardType;
-
if (ST.hasNSAtoVMEMBug() && checkNSAtoVMEMHazard(MI) > 0)
return HazardType;
@@ -172,6 +166,12 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (ST.hasNoDataDepHazard())
return NoHazard;
+ // FIXME: Should flat be considered vmem?
+ if ((SIInstrInfo::isVMEM(*MI) ||
+ SIInstrInfo::isFLAT(*MI))
+ && checkVMEMHazards(MI) > 0)
+ return HazardType;
+
if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
return HazardType;
@@ -275,9 +275,6 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (SIInstrInfo::isSMRD(*MI))
return std::max(WaitStates, checkSMRDHazards(MI));
- if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
- WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
-
if (ST.hasNSAtoVMEMBug())
WaitStates = std::max(WaitStates, checkNSAtoVMEMHazard(MI));
@@ -286,6 +283,9 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasNoDataDepHazard())
return WaitStates;
+ if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
+ WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+
if (SIInstrInfo::isVALU(*MI))
WaitStates = std::max(WaitStates, checkVALUHazards(MI));
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index f363b4ba0f86..bc95c364f891 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -85,9 +85,8 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
; GCN-NEXT: v_readfirstlane_b32 s6, v16
; GCN-NEXT: v_readfirstlane_b32 s7, v17
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[14:15]
-; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
-; GCN-NEXT: s_nop 2
; GCN-NEXT: image_bvh_intersect_ray v[18:21], [v0, v1, v2, v3, v4, v6, v7, v8, v10, v11, v12], s[4:7]
+; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[16:17]
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
; GCN-NEXT: s_and_saveexec_b32 s0, s0
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
@@ -125,9 +124,8 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
; GCN-NEXT: v_readfirstlane_b32 s6, v12
; GCN-NEXT: v_readfirstlane_b32 s7, v13
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11]
-; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
-; GCN-NEXT: s_nop 2
; GCN-NEXT: image_bvh_intersect_ray v[5:8], [v0, v1, v2, v3, v4, v9, v14, v15], s[4:7] a16
+; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
; GCN-NEXT: s_and_saveexec_b32 s0, s0
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
@@ -155,9 +153,8 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
; GCN-NEXT: v_readfirstlane_b32 s6, v17
; GCN-NEXT: v_readfirstlane_b32 s7, v18
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[15:16]
-; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
-; GCN-NEXT: s_nop 2
; GCN-NEXT: image_bvh64_intersect_ray v[19:22], [v0, v1, v2, v3, v4, v5, v7, v8, v9, v11, v12, v13], s[4:7]
+; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[17:18]
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
; GCN-NEXT: s_and_saveexec_b32 s0, s0
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
@@ -195,9 +192,8 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
; GCN-NEXT: v_readfirstlane_b32 s6, v13
; GCN-NEXT: v_readfirstlane_b32 s7, v14
; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12]
-; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
-; GCN-NEXT: s_nop 2
; GCN-NEXT: image_bvh64_intersect_ray v[6:9], [v0, v1, v2, v3, v4, v5, v10, v15, v16], s[4:7] a16
+; GCN-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
; GCN-NEXT: s_and_b32 s0, s0, vcc_lo
; GCN-NEXT: s_and_saveexec_b32 s0, s0
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s0
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 0dd84a9c5eb8..1753cdcf407b 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -131,7 +131,6 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -162,7 +161,6 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_mad_u32_u24 v0, v0, 5, s2
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -518,7 +516,6 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -569,7 +566,6 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -754,7 +750,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -805,7 +800,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -990,7 +984,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -1041,7 +1034,6 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_add_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -1186,7 +1178,6 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_mov_b32 s2, -1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 1
; GFX1064-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -1219,7 +1210,6 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_mov_b32 s2, -1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 1
; GFX1032-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -2040,7 +2030,6 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_sub_nc_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -2091,7 +2080,6 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_sub_nc_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -2762,7 +2750,6 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_and_b32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -2813,7 +2800,6 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_and_b32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -2998,7 +2984,6 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_or_b32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -3049,7 +3034,6 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_or_b32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -3234,7 +3218,6 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_xor_b32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -3285,7 +3268,6 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_xor_b32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -3472,7 +3454,6 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_max_i32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -3525,7 +3506,6 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_max_i32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -3891,7 +3871,6 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_min_i32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -3944,7 +3923,6 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_min_i32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -4308,7 +4286,6 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_max_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -4359,7 +4336,6 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_max_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
@@ -4720,7 +4696,6 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX1064-NEXT: v_min_u32_e32 v0, s3, v0
; GFX1064-NEXT: s_mov_b32 s3, 0x31016000
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: s_nop 0
; GFX1064-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1064-NEXT: s_endpgm
;
@@ -4771,7 +4746,6 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
; GFX1032-NEXT: v_min_u32_e32 v0, s3, v0
; GFX1032-NEXT: s_mov_b32 s3, 0x31016000
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: s_nop 0
; GFX1032-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX1032-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
index b511d5917945..af3a6a868981 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
@@ -198,7 +198,6 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn(i32 %voffset,
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_gl0_inv
@@ -238,7 +237,6 @@ define amdgpu_ps float @global_xchg_saddr_uniform_ptr_in_vgprs_rtn_immoffset(i32
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v0, v1, s[0:1] offset:42 glc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: buffer_gl0_inv
@@ -279,7 +277,6 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn(i32 %voffset
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1]
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_gl0_inv
@@ -318,7 +315,6 @@ define amdgpu_ps void @global_xchg_saddr_uniform_ptr_in_vgprs_nortn_immoffset(i3
; GFX10-NEXT: v_readfirstlane_b32 s1, v3
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: s_nop 2
; GFX10-NEXT: global_atomic_swap v0, v1, s[0:1] offset:42
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: buffer_gl0_inv
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
index f28b9fdcdccc..1e58d6dbba05 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
@@ -788,17 +788,28 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_ptrtoint_commute_add_imm_
; Base pointer is uniform, but also in VGPRs
define amdgpu_ps float @global_load_saddr_uniform_ptr_in_vgprs(i32 %voffset) {
-; GCN-LABEL: global_load_saddr_uniform_ptr_in_vgprs:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: ds_read_b64 v[1:2], v1
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s0, v1
-; GCN-NEXT: v_readfirstlane_b32 s1, v2
-; GCN-NEXT: s_nop 4
-; GCN-NEXT: global_load_ubyte v0, v0, s[0:1]
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: ; return to shader part epilog
+; GFX9-LABEL: global_load_saddr_uniform_ptr_in_vgprs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: ds_read_b64 v[1:2], v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
+; GFX9-NEXT: v_readfirstlane_b32 s1, v2
+; GFX9-NEXT: s_nop 4
+; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: global_load_saddr_uniform_ptr_in_vgprs:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: ds_read_b64 v[1:2], v1
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_readfirstlane_b32 s0, v1
+; GFX10-NEXT: v_readfirstlane_b32 s1, v2
+; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
@@ -810,17 +821,28 @@ define amdgpu_ps float @global_load_saddr_uniform_ptr_in_vgprs(i32 %voffset) {
; Base pointer is uniform, but also in VGPRs, with imm offset
define amdgpu_ps float @global_load_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset) {
-; GCN-LABEL: global_load_saddr_uniform_ptr_in_vgprs_immoffset:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: ds_read_b64 v[1:2], v1
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s0, v1
-; GCN-NEXT: v_readfirstlane_b32 s1, v2
-; GCN-NEXT: s_nop 4
-; GCN-NEXT: global_load_ubyte v0, v0, s[0:1] offset:42
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: ; return to shader part epilog
+; GFX9-LABEL: global_load_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-NEXT: ds_read_b64 v[1:2], v1
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
+; GFX9-NEXT: v_readfirstlane_b32 s1, v2
+; GFX9-NEXT: s_nop 4
+; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:42
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: global_load_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: ds_read_b64 v[1:2], v1
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_readfirstlane_b32 s0, v1
+; GFX10-NEXT: v_readfirstlane_b32 s1, v2
+; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:42
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: ; return to shader part epilog
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll
index c50efde84520..2b3e6a4e9c31 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-store.ll
@@ -58,16 +58,26 @@ define amdgpu_ps void @global_store_saddr_i8_zext_vgpr_offset_neg2048(i8 addrspa
; Base pointer is uniform, but also in VGPRs
define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8 %data) {
-; GCN-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: ds_read_b64 v[2:3], v2
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s0, v2
-; GCN-NEXT: v_readfirstlane_b32 s1, v3
-; GCN-NEXT: s_nop 4
-; GCN-NEXT: global_store_byte v0, v1, s[0:1]
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: ds_read_b64 v[2:3], v2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_readfirstlane_b32 s0, v2
+; GFX9-NEXT: v_readfirstlane_b32 s1, v3
+; GFX9-NEXT: s_nop 4
+; GFX9-NEXT: global_store_byte v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: ds_read_b64 v[2:3], v2
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_readfirstlane_b32 s0, v2
+; GFX10-NEXT: v_readfirstlane_b32 s1, v3
+; GFX10-NEXT: global_store_byte v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
@@ -77,16 +87,26 @@ define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset, i8
; Base pointer is uniform, but also in VGPRs, with imm offset
define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs_immoffset(i32 %voffset, i8 %data) {
-; GCN-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
-; GCN: ; %bb.0:
-; GCN-NEXT: v_mov_b32_e32 v2, 0
-; GCN-NEXT: ds_read_b64 v[2:3], v2
-; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_readfirstlane_b32 s0, v2
-; GCN-NEXT: v_readfirstlane_b32 s1, v3
-; GCN-NEXT: s_nop 4
-; GCN-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
-; GCN-NEXT: s_endpgm
+; GFX9-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: ds_read_b64 v[2:3], v2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_readfirstlane_b32 s0, v2
+; GFX9-NEXT: v_readfirstlane_b32 s1, v3
+; GFX9-NEXT: s_nop 4
+; GFX9-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: global_store_saddr_uniform_ptr_in_vgprs_immoffset:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: ds_read_b64 v[2:3], v2
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_readfirstlane_b32 s0, v2
+; GFX10-NEXT: v_readfirstlane_b32 s1, v3
+; GFX10-NEXT: global_store_byte v0, v1, s[0:1] offset:-120
+; GFX10-NEXT: s_endpgm
%sbase = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(3)* @ptr.in.lds
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, i8 addrspace(1)* %sbase, i64 %zext.offset
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
index e36f4bd63387..496416fd9864 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-hidden-bundle.mir
@@ -40,7 +40,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_hazard_ignore_bundle_instr
# GCN: S_LOAD_DWORDX2_IMM
# GCN-NEXT: }
-# GCN-NEXT: S_NOP 3
+# GFX9-NEXT: S_NOP 3
# GCN: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_hazard_ignore_bundle_instr
@@ -60,7 +60,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_min_of_two_after_bundle
# GCN: bb.2:
-# GCN-NEXT: S_NOP 4
+# GFX9-NEXT: S_NOP 4
# GCN-NEXT: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_min_of_two_after_bundle
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
index 48f26c1e0b9b..7f99b4af154a 100644
--- a/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
+++ b/llvm/test/CodeGen/AMDGPU/hazard-in-bundle.mir
@@ -67,7 +67,7 @@ body: |
# GCN-LABEL: name: vmem_vcc_hazard_in_bundle
# GCN: S_LOAD_DWORDX2_IMM
-# GCN-NEXT: S_NOP 3
+# GFX9-NEXT: S_NOP 3
# GCN: BUFFER_LOAD_DWORD_OFFEN
---
name: vmem_vcc_hazard_in_bundle
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 4dfc9bce69aa..787877916783 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -16,7 +16,6 @@
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: s_nop 0
; W64: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W64: s_xor_b64 exec, exec, [[AND]]
; W64: s_cbranch_execnz [[LOOPBB]]
@@ -34,7 +33,6 @@
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: s_nop 0
; W32: buffer_load_format_x [[RES:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB]]
@@ -59,7 +57,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: s_nop 0
; W64: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB0]]
@@ -77,7 +74,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: s_nop 0
; W64: buffer_load_format_x [[RES1:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB1]]
@@ -99,7 +95,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: s_nop 0
; W32: buffer_load_format_x [[RES0:v[0-9]+]], v{{[0-9]+}}, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB0]]
@@ -117,7 +112,6 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 {
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: s_nop 0
; W32: buffer_load_format_x [[RES1:v[0-9]+]], v8, s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB1]]
@@ -150,7 +144,6 @@ entry:
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: s_nop 0
; W64: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB0]]
@@ -171,7 +164,6 @@ entry:
; W64: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W64: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], vcc, [[CMP0]]
; W64: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
-; W64: s_nop 0
; W64: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W64: s_xor_b64 exec, exec, [[SAVE]]
; W64: s_cbranch_execnz [[LOOPBB1]]
@@ -196,7 +188,6 @@ entry:
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: s_nop 0
; W32: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB0]]
@@ -217,7 +208,6 @@ entry:
; W32: v_cmp_eq_u64_e64 [[CMP0:s[0-9]+]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}}
; W32: s_and_b32 [[AND:s[0-9]+]], vcc_lo, [[CMP0]]
; W32: s_and_saveexec_b32 [[SAVE:s[0-9]+]], [[AND]]
-; W32: s_nop 0
; W32: buffer_load_format_x [[RES]], [[IDX]], s{{\[}}[[SRSRC0]]:[[SRSRC3]]{{\]}}, 0 idxen
; W32: s_xor_b32 exec_lo, exec_lo, [[SAVE]]
; W32: s_cbranch_execnz [[LOOPBB1]]
diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index c743281440c5..c48b3e185e19 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -67,7 +67,6 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
; FLATSCR-NEXT: s_cbranch_execz BB0_2
; FLATSCR-NEXT: ; %bb.1: ; %if.then4.i
; FLATSCR-NEXT: s_movk_i32 vcc_lo, 0x4000
-; FLATSCR-NEXT: s_nop 1
; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index 3a18f4680466..ede567e58b69 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -26,7 +26,6 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
; GCN-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3]
; GCN-NEXT: s_and_b32 s4, vcc_lo, s4
; GCN-NEXT: s_and_saveexec_b32 s4, s4
-; GCN-NEXT: s_nop 0
; GCN-NEXT: buffer_store_dword v0, v0, s[8:11], 0 offen
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s4
More information about the llvm-commits
mailing list