[llvm] AMDGPU: Avoid creating unnecessary block split in atomic expansion (PR #102440)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 8 02:07:31 PDT 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102440
This was creating a new block to insert the is.shared check, but we
can just do that in the original block.
>From 002d8999f6e4520452605082b61059b6ff276c0a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 8 Aug 2024 01:29:14 +0400
Subject: [PATCH] AMDGPU: Avoid creating unnecessary block split in atomic
expansion
This was creating a new block to insert the is.shared check, but we
can just do that in the original block.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 ---
llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 4 +-
.../CodeGen/AMDGPU/flat-atomicrmw-fadd.ll | 44 +++++++++----------
.../AtomicExpand/AMDGPU/expand-atomic-mmra.ll | 2 -
...and-atomic-rmw-fadd-flat-specialization.ll | 24 ++++------
.../AMDGPU/expand-atomic-rmw-fadd.ll | 6 +--
6 files changed, 34 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4e9c271197613b..e982f1e6432894 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16595,8 +16595,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
Function *F = BB->getParent();
BasicBlock *ExitBB =
BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
- BasicBlock *CheckSharedBB =
- BasicBlock::Create(Ctx, "atomicrmw.check.shared", F, ExitBB);
BasicBlock *SharedBB = BasicBlock::Create(Ctx, "atomicrmw.shared", F, ExitBB);
BasicBlock *CheckPrivateBB =
BasicBlock::Create(Ctx, "atomicrmw.check.private", F, ExitBB);
@@ -16623,9 +16621,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- Builder.CreateBr(CheckSharedBB);
-
- Builder.SetInsertPoint(CheckSharedBB);
CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
{Addr}, nullptr, "is.shared");
Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);
diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
index 8bf7a1cc42f642..4f0bc512565d13 100644
--- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll
@@ -222,7 +222,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 {
define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX908-LABEL: syncscope_workgroup_nortn:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
@@ -272,7 +272,7 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX908-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: syncscope_workgroup_nortn:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
index 896b85ea14da11..422c8a0be23b49 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll
@@ -630,7 +630,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
@@ -682,7 +682,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__am
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
@@ -839,7 +839,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -893,7 +893,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -1062,7 +1062,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
@@ -1116,7 +1116,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
@@ -1469,7 +1469,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -1525,7 +1525,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -2006,7 +2006,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -2060,7 +2060,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -2950,7 +2950,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
@@ -3002,7 +3002,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memor
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
@@ -3159,7 +3159,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -3213,7 +3213,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -3382,7 +3382,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
@@ -3436,7 +3436,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
@@ -3789,7 +3789,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -3845,7 +3845,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -4198,7 +4198,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX90A-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -4254,7 +4254,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fc, v0
; GFX908-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
@@ -5239,7 +5239,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX90A-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX90A: ; %bb.0: ; %atomicrmw.check.shared
+; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX90A-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
@@ -5291,7 +5291,7 @@ define void @flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amd
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
; GFX908-LABEL: flat_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory_amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
-; GFX908: ; %bb.0: ; %atomicrmw.check.shared
+; GFX908: ; %bb.0:
; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX908-NEXT: s_mov_b64 s[4:5], src_shared_base
; GFX908-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
index d51e9291a6119c..78969839efcb8a 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-mmra.ll
@@ -127,8 +127,6 @@ define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in,
define void @syncscope_workgroup_nortn(ptr %addr, float %val) #0 {
; GFX90A-LABEL: define void @syncscope_workgroup_nortn(
; GFX90A-SAME: ptr [[ADDR:%.*]], float [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
-; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
-; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
index 70dc5b267f73b9..96cbf057b490a8 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll
@@ -22,8 +22,6 @@ define float @syncscope_system(ptr %addr, float %val) {
; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @syncscope_system(
-; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
-; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
@@ -36,8 +34,8 @@ define float @syncscope_system(ptr %addr, float %val) {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
-; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
+; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
+; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
@@ -94,8 +92,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @syncscope_workgroup_rtn(
-; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
-; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
@@ -108,8 +104,8 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
-; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
+; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
+; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
@@ -150,8 +146,6 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) {
define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-LABEL: @syncscope_workgroup_nortn(
-; GFX908-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
-; GFX908: atomicrmw.check.shared:
; GFX908-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX908-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX908: atomicrmw.shared:
@@ -164,8 +158,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908: atomicrmw.private:
; GFX908-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX908-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
-; GFX908-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX908-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
+; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
+; GFX908-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX908-NEXT: br label [[ATOMICRMW_PHI]]
; GFX908: atomicrmw.global:
; GFX908-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
@@ -178,8 +172,6 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX908-NEXT: ret void
;
; GFX90A-LABEL: @syncscope_workgroup_nortn(
-; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
-; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
@@ -192,8 +184,8 @@ define void @syncscope_workgroup_nortn(ptr %addr, float %val) {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
-; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
+; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
+; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
index def9522077004f..7eaaf2ae1ec997 100644
--- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
+++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd.ll
@@ -595,8 +595,6 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 {
; GFX908-NEXT: ret float [[TMP5]]
;
; GFX90A-LABEL: @test_atomicrmw_fadd_f32_flat_unsafe(
-; GFX90A-NEXT: br label [[ATOMICRMW_CHECK_SHARED:%.*]]
-; GFX90A: atomicrmw.check.shared:
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR:%.*]])
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
; GFX90A: atomicrmw.shared:
@@ -609,8 +607,8 @@ define float @test_atomicrmw_fadd_f32_flat_unsafe(ptr %ptr, float %value) #0 {
; GFX90A: atomicrmw.private:
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 4
-; GFX90A-NEXT: [[VAL_NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]]
-; GFX90A-NEXT: store float [[VAL_NEW]], ptr addrspace(5) [[TMP3]], align 4
+; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VALUE]]
+; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 4
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
; GFX90A: atomicrmw.global:
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
More information about the llvm-commits
mailing list