[llvm] AtomicExpand: Stop precollecting atomic instructions in function (PR #102914)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 08:09:54 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102914
>From 6a4771cffd04bcf649270f18421d11913a596f7f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 12 Aug 2024 18:16:21 +0400
Subject: [PATCH] AtomicExpand: Stop precollecting atomic instructions in
function
Move the processing of an instruction into a helper function. Also
avoid redundant checking for all types of atomic instructions.
Including the assert, it was effectively performing the same check
3 times.
---
llvm/lib/CodeGen/AtomicExpandPass.cpp | 269 ++++++++++++-----------
llvm/test/CodeGen/NVPTX/atomics-sm70.ll | 16 +-
llvm/test/CodeGen/NVPTX/atomics-sm90.ll | 16 +-
llvm/test/CodeGen/PowerPC/all-atomics.ll | 88 ++++----
llvm/test/CodeGen/X86/atomic6432.ll | 56 ++---
llvm/test/CodeGen/X86/pr5145.ll | 16 +-
llvm/test/CodeGen/X86/pr59305.ll | 4 +-
7 files changed, 239 insertions(+), 226 deletions(-)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 49836b914784fc..f6f6af715abd82 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -119,6 +119,8 @@ class AtomicExpandImpl {
llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg);
+ bool processAtomicInstr(Instruction *I);
+
public:
bool run(Function &F, const TargetMachine *TM);
};
@@ -203,149 +205,158 @@ static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
}
-bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
- const auto *Subtarget = TM->getSubtargetImpl(F);
- if (!Subtarget->enableAtomicExpand())
- return false;
- TLI = Subtarget->getTargetLowering();
- DL = &F.getDataLayout();
+bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
+ auto *LI = dyn_cast<LoadInst>(I);
+ auto *SI = dyn_cast<StoreInst>(I);
+ auto *RMWI = dyn_cast<AtomicRMWInst>(I);
+ auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
- SmallVector<Instruction *, 1> AtomicInsts;
+ // If the Size/Alignment is not supported, replace with a libcall.
+ if (LI) {
+ if (!LI->isAtomic())
+ return false;
- // Changing control-flow while iterating through it is a bad idea, so gather a
- // list of all atomic instructions before we start.
- for (Instruction &I : instructions(F))
- if (I.isAtomic() && !isa<FenceInst>(&I))
- AtomicInsts.push_back(&I);
+ if (!atomicSizeSupported(TLI, LI)) {
+ expandAtomicLoadToLibcall(LI);
+ return true;
+ }
+ } else if (SI) {
+ if (!SI->isAtomic())
+ return false;
- bool MadeChange = false;
- for (auto *I : AtomicInsts) {
- auto LI = dyn_cast<LoadInst>(I);
- auto SI = dyn_cast<StoreInst>(I);
- auto RMWI = dyn_cast<AtomicRMWInst>(I);
- auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
- assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
-
- // If the Size/Alignment is not supported, replace with a libcall.
- if (LI) {
- if (!atomicSizeSupported(TLI, LI)) {
- expandAtomicLoadToLibcall(LI);
- MadeChange = true;
- continue;
- }
- } else if (SI) {
- if (!atomicSizeSupported(TLI, SI)) {
- expandAtomicStoreToLibcall(SI);
- MadeChange = true;
- continue;
- }
- } else if (RMWI) {
- if (!atomicSizeSupported(TLI, RMWI)) {
- expandAtomicRMWToLibcall(RMWI);
- MadeChange = true;
- continue;
- }
- } else if (CASI) {
- if (!atomicSizeSupported(TLI, CASI)) {
- expandAtomicCASToLibcall(CASI);
- MadeChange = true;
- continue;
- }
+ if (!atomicSizeSupported(TLI, SI)) {
+ expandAtomicStoreToLibcall(SI);
+ return true;
+ }
+ } else if (RMWI) {
+ if (!atomicSizeSupported(TLI, RMWI)) {
+ expandAtomicRMWToLibcall(RMWI);
+ return true;
}
+ } else if (CASI) {
+ if (!atomicSizeSupported(TLI, CASI)) {
+ expandAtomicCASToLibcall(CASI);
+ return true;
+ }
+ } else
+ return false;
- if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
- TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
- I = LI = convertAtomicLoadToIntegerType(LI);
- MadeChange = true;
- } else if (SI &&
- TLI->shouldCastAtomicStoreInIR(SI) ==
- TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
- I = SI = convertAtomicStoreToIntegerType(SI);
+ bool MadeChange = false;
+
+ if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = LI = convertAtomicLoadToIntegerType(LI);
+ MadeChange = true;
+ } else if (SI && TLI->shouldCastAtomicStoreInIR(SI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = SI = convertAtomicStoreToIntegerType(SI);
+ MadeChange = true;
+ } else if (RMWI &&
+ TLI->shouldCastAtomicRMWIInIR(RMWI) ==
+ TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
+ I = RMWI = convertAtomicXchgToIntegerType(RMWI);
+ MadeChange = true;
+ } else if (CASI) {
+ // TODO: when we're ready to make the change at the IR level, we can
+ // extend convertCmpXchgToInteger for floating point too.
+ if (CASI->getCompareOperand()->getType()->isPointerTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ I = CASI = convertCmpXchgToIntegerType(CASI);
MadeChange = true;
- } else if (RMWI &&
- TLI->shouldCastAtomicRMWIInIR(RMWI) ==
- TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
- I = RMWI = convertAtomicXchgToIntegerType(RMWI);
+ }
+ }
+
+ if (TLI->shouldInsertFencesForAtomic(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ if (LI && isAcquireOrStronger(LI->getOrdering())) {
+ FenceOrdering = LI->getOrdering();
+ LI->setOrdering(AtomicOrdering::Monotonic);
+ } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
+ FenceOrdering = SI->getOrdering();
+ SI->setOrdering(AtomicOrdering::Monotonic);
+ } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
+ isAcquireOrStronger(RMWI->getOrdering()))) {
+ FenceOrdering = RMWI->getOrdering();
+ RMWI->setOrdering(AtomicOrdering::Monotonic);
+ } else if (CASI &&
+ TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
+ TargetLoweringBase::AtomicExpansionKind::None &&
+ (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getSuccessOrdering()) ||
+ isAcquireOrStronger(CASI->getFailureOrdering()))) {
+ // If a compare and swap is lowered to LL/SC, we can do smarter fence
+ // insertion, with a stronger one on the success path than on the
+ // failure path. As a result, fence insertion is directly done by
+ // expandAtomicCmpXchg in that case.
+ FenceOrdering = CASI->getMergedOrdering();
+ CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
+ CASI->setFailureOrdering(AtomicOrdering::Monotonic);
+ }
+
+ if (FenceOrdering != AtomicOrdering::Monotonic) {
+ MadeChange |= bracketInstWithFences(I, FenceOrdering);
+ }
+ } else if (I->hasAtomicStore() &&
+ TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
+ auto FenceOrdering = AtomicOrdering::Monotonic;
+ if (SI)
+ FenceOrdering = SI->getOrdering();
+ else if (RMWI)
+ FenceOrdering = RMWI->getOrdering();
+ else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
+ TargetLoweringBase::AtomicExpansionKind::LLSC)
+ // LLSC is handled in expandAtomicCmpXchg().
+ FenceOrdering = CASI->getSuccessOrdering();
+
+ IRBuilder Builder(I);
+ if (auto TrailingFence =
+ TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
+ TrailingFence->moveAfter(I);
MadeChange = true;
- } else if (CASI) {
- // TODO: when we're ready to make the change at the IR level, we can
- // extend convertCmpXchgToInteger for floating point too.
- if (CASI->getCompareOperand()->getType()->isPointerTy()) {
- // TODO: add a TLI hook to control this so that each target can
- // convert to lowering the original type one at a time.
- I = CASI = convertCmpXchgToIntegerType(CASI);
- MadeChange = true;
- }
}
+ }
- if (TLI->shouldInsertFencesForAtomic(I)) {
- auto FenceOrdering = AtomicOrdering::Monotonic;
- if (LI && isAcquireOrStronger(LI->getOrdering())) {
- FenceOrdering = LI->getOrdering();
- LI->setOrdering(AtomicOrdering::Monotonic);
- } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
- FenceOrdering = SI->getOrdering();
- SI->setOrdering(AtomicOrdering::Monotonic);
- } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
- isAcquireOrStronger(RMWI->getOrdering()))) {
- FenceOrdering = RMWI->getOrdering();
- RMWI->setOrdering(AtomicOrdering::Monotonic);
- } else if (CASI &&
- TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
- TargetLoweringBase::AtomicExpansionKind::None &&
- (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
- isAcquireOrStronger(CASI->getSuccessOrdering()) ||
- isAcquireOrStronger(CASI->getFailureOrdering()))) {
- // If a compare and swap is lowered to LL/SC, we can do smarter fence
- // insertion, with a stronger one on the success path than on the
- // failure path. As a result, fence insertion is directly done by
- // expandAtomicCmpXchg in that case.
- FenceOrdering = CASI->getMergedOrdering();
- CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
- CASI->setFailureOrdering(AtomicOrdering::Monotonic);
- }
+ if (LI)
+ MadeChange |= tryExpandAtomicLoad(LI);
+ else if (SI)
+ MadeChange |= tryExpandAtomicStore(SI);
+ else if (RMWI) {
+ // There are two different ways of expanding RMW instructions:
+ // - into a load if it is idempotent
+ // - into a Cmpxchg/LL-SC loop otherwise
+ // we try them in that order.
+
+ if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ MadeChange = true;
- if (FenceOrdering != AtomicOrdering::Monotonic) {
- MadeChange |= bracketInstWithFences(I, FenceOrdering);
- }
- } else if (I->hasAtomicStore() &&
- TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
- auto FenceOrdering = AtomicOrdering::Monotonic;
- if (SI)
- FenceOrdering = SI->getOrdering();
- else if (RMWI)
- FenceOrdering = RMWI->getOrdering();
- else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
- TargetLoweringBase::AtomicExpansionKind::LLSC)
- // LLSC is handled in expandAtomicCmpXchg().
- FenceOrdering = CASI->getSuccessOrdering();
-
- IRBuilder Builder(I);
- if (auto TrailingFence =
- TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
- TrailingFence->moveAfter(I);
- MadeChange = true;
- }
+ } else {
+ MadeChange |= tryExpandAtomicRMW(RMWI);
}
+ } else if (CASI)
+ MadeChange |= tryExpandAtomicCmpXchg(CASI);
- if (LI)
- MadeChange |= tryExpandAtomicLoad(LI);
- else if (SI)
- MadeChange |= tryExpandAtomicStore(SI);
- else if (RMWI) {
- // There are two different ways of expanding RMW instructions:
- // - into a load if it is idempotent
- // - into a Cmpxchg/LL-SC loop otherwise
- // we try them in that order.
-
- if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
+ return MadeChange;
+}
+
+bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
+ const auto *Subtarget = TM->getSubtargetImpl(F);
+ if (!Subtarget->enableAtomicExpand())
+ return false;
+ TLI = Subtarget->getTargetLowering();
+ DL = &F.getDataLayout();
+
+ bool MadeChange = false;
+
+ for (BasicBlock &BB : make_early_inc_range(F)) {
+ for (Instruction &I : make_early_inc_range(reverse(BB))) {
+ // We do this iteration backwards because the control flow introducing
+ // transforms split the block at the end.
+ if (processAtomicInstr(&I))
MadeChange = true;
- } else {
- MadeChange |= tryExpandAtomicRMW(RMWI);
- }
- } else if (CASI)
- MadeChange |= tryExpandAtomicCmpXchg(CASI);
+ }
}
+
return MadeChange;
}
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 9cc45fbe313b7e..0c1ca8cb7ac166 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -61,7 +61,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: shl.b32 %r27, %r26, %r2;
; CHECKPTX62-NEXT: not.b32 %r3, %r27;
; CHECKPTX62-NEXT: ld.u32 %r54, [%r1];
-; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start
+; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start45
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r28, %r54, %r2;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r28;
@@ -74,9 +74,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
; CHECKPTX62-NEXT: mov.u32 %r54, %r6;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
-; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end
+; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX62-NEXT: ld.u32 %r55, [%r1];
-; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start9
+; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start27
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r33, %r55, %r2;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs6, %r33;
@@ -90,14 +90,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
; CHECKPTX62-NEXT: mov.u32 %r55, %r9;
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
-; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end8
+; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
; CHECKPTX62-NEXT: shl.b32 %r38, %r22, 3;
; CHECKPTX62-NEXT: and.b32 %r11, %r38, 24;
; CHECKPTX62-NEXT: shl.b32 %r40, %r26, %r11;
; CHECKPTX62-NEXT: not.b32 %r12, %r40;
; CHECKPTX62-NEXT: ld.global.u32 %r56, [%r10];
-; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start27
+; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start9
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r41, %r56, %r11;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs11, %r41;
@@ -110,14 +110,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
; CHECKPTX62-NEXT: mov.u32 %r56, %r15;
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
-; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end26
+; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
; CHECKPTX62-NEXT: shl.b32 %r46, %r23, 3;
; CHECKPTX62-NEXT: and.b32 %r17, %r46, 24;
; CHECKPTX62-NEXT: shl.b32 %r48, %r26, %r17;
; CHECKPTX62-NEXT: not.b32 %r18, %r48;
; CHECKPTX62-NEXT: ld.shared.u32 %r57, [%r16];
-; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start45
+; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r49, %r57, %r17;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs15, %r49;
@@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
; CHECKPTX62-NEXT: mov.u32 %r57, %r21;
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
-; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end44
+; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX62-NEXT: ret;
%r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
%r2 = atomicrmw fadd ptr %dp0, half 1.0 seq_cst
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index 9301ea44c69367..22e5033f647a65 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -63,7 +63,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: not.b32 %r3, %r27;
; CHECKPTX71-NEXT: ld.u32 %r54, [%r1];
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs1;
-; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start
+; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start45
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: shr.u32 %r28, %r54, %r2;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs2, %r28;
@@ -78,9 +78,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
; CHECKPTX71-NEXT: mov.u32 %r54, %r6;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
-; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end
+; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX71-NEXT: ld.u32 %r55, [%r1];
-; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start9
+; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start27
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: shr.u32 %r33, %r55, %r2;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs6, %r33;
@@ -95,14 +95,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
; CHECKPTX71-NEXT: mov.u32 %r55, %r9;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
-; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end8
+; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
; CHECKPTX71-NEXT: shl.b32 %r38, %r22, 3;
; CHECKPTX71-NEXT: and.b32 %r11, %r38, 24;
; CHECKPTX71-NEXT: shl.b32 %r40, %r26, %r11;
; CHECKPTX71-NEXT: not.b32 %r12, %r40;
; CHECKPTX71-NEXT: ld.global.u32 %r56, [%r10];
-; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start27
+; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start9
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: shr.u32 %r41, %r56, %r11;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs10, %r41;
@@ -117,14 +117,14 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
; CHECKPTX71-NEXT: mov.u32 %r56, %r15;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
-; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end26
+; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
; CHECKPTX71-NEXT: shl.b32 %r46, %r23, 3;
; CHECKPTX71-NEXT: and.b32 %r17, %r46, 24;
; CHECKPTX71-NEXT: shl.b32 %r48, %r26, %r17;
; CHECKPTX71-NEXT: not.b32 %r18, %r48;
; CHECKPTX71-NEXT: ld.shared.u32 %r57, [%r16];
-; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start45
+; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: shr.u32 %r49, %r57, %r17;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs14, %r49;
@@ -139,7 +139,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
; CHECKPTX71-NEXT: mov.u32 %r57, %r21;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
-; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end44
+; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX71-NEXT: ret;
%r1 = atomicrmw fadd ptr %dp0, bfloat %val seq_cst
%r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 093253bf8f6915..531e559ea7309c 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -913,7 +913,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 6, 4(31)
; AIX32-NEXT: lwz 7, 0(31)
; AIX32-NEXT: .align 4
-; AIX32-NEXT: L..BB0_49: # %atomicrmw.start
+; AIX32-NEXT: L..BB0_49: # %atomicrmw.start2
; AIX32-NEXT: #
; AIX32-NEXT: xori 3, 5, 1
; AIX32-NEXT: stw 7, 72(1)
@@ -938,7 +938,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 7, 72(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB0_49
-; AIX32-NEXT: # %bb.50: # %atomicrmw.end
+; AIX32-NEXT: # %bb.50: # %atomicrmw.end1
; AIX32-NEXT: lwz 31, L..C9(2) # @s128
; AIX32-NEXT: addi 30, 1, 72
; AIX32-NEXT: addi 29, 1, 56
@@ -947,7 +947,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 6, 4(31)
; AIX32-NEXT: lwz 7, 0(31)
; AIX32-NEXT: .align 4
-; AIX32-NEXT: L..BB0_51: # %atomicrmw.start2
+; AIX32-NEXT: L..BB0_51: # %atomicrmw.start
; AIX32-NEXT: #
; AIX32-NEXT: xori 3, 5, 1
; AIX32-NEXT: stw 7, 72(1)
@@ -972,13 +972,13 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 7, 72(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB0_51
-; AIX32-NEXT: # %bb.52: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.52: # %atomicrmw.end
; AIX32-NEXT: li 29, 1
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: sync
; AIX32-NEXT: slw 18, 29, 26
; AIX32-NEXT: slw 3, 3, 26
-; AIX32-NEXT: L..BB0_53: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_53: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 28
; AIX32-NEXT: nand 5, 18, 4
@@ -987,13 +987,13 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 28
; AIX32-NEXT: bne 0, L..BB0_53
-; AIX32-NEXT: # %bb.54: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.54: # %atomicrmw.end
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: lwsync
; AIX32-NEXT: slw 17, 29, 24
; AIX32-NEXT: sync
; AIX32-NEXT: slw 3, 3, 24
-; AIX32-NEXT: L..BB0_55: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_55: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 27
; AIX32-NEXT: nand 5, 17, 4
@@ -1002,14 +1002,14 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 27
; AIX32-NEXT: bne 0, L..BB0_55
-; AIX32-NEXT: # %bb.56: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.56: # %atomicrmw.end
; AIX32-NEXT: li 3, 0
; AIX32-NEXT: lwsync
; AIX32-NEXT: slw 16, 29, 22
; AIX32-NEXT: sync
; AIX32-NEXT: ori 3, 3, 65535
; AIX32-NEXT: slw 3, 3, 22
-; AIX32-NEXT: L..BB0_57: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_57: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 25
; AIX32-NEXT: nand 5, 16, 4
@@ -1018,14 +1018,14 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 25
; AIX32-NEXT: bne 0, L..BB0_57
-; AIX32-NEXT: # %bb.58: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.58: # %atomicrmw.end
; AIX32-NEXT: li 3, 0
; AIX32-NEXT: lwsync
; AIX32-NEXT: slw 15, 29, 21
; AIX32-NEXT: sync
; AIX32-NEXT: ori 3, 3, 65535
; AIX32-NEXT: slw 3, 3, 21
-; AIX32-NEXT: L..BB0_59: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_59: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 23
; AIX32-NEXT: nand 5, 15, 4
@@ -1034,25 +1034,25 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 23
; AIX32-NEXT: bne 0, L..BB0_59
-; AIX32-NEXT: # %bb.60: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.60: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
-; AIX32-NEXT: L..BB0_61: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_61: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 20
; AIX32-NEXT: nand 3, 29, 3
; AIX32-NEXT: stwcx. 3, 0, 20
; AIX32-NEXT: bne 0, L..BB0_61
-; AIX32-NEXT: # %bb.62: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.62: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
-; AIX32-NEXT: L..BB0_63: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_63: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 19
; AIX32-NEXT: nand 3, 29, 3
; AIX32-NEXT: stwcx. 3, 0, 19
; AIX32-NEXT: bne 0, L..BB0_63
-; AIX32-NEXT: # %bb.64: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.64: # %atomicrmw.end
; AIX32-NEXT: lwz 31, L..C6(2) # @sll
; AIX32-NEXT: lwsync
; AIX32-NEXT: li 4, 0
@@ -1071,7 +1071,7 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: sync
; AIX32-NEXT: slw 3, 3, 26
-; AIX32-NEXT: L..BB0_65: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_65: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 28
; AIX32-NEXT: and 5, 18, 4
@@ -1080,12 +1080,12 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 28
; AIX32-NEXT: bne 0, L..BB0_65
-; AIX32-NEXT: # %bb.66: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.66: # %atomicrmw.end
; AIX32-NEXT: li 3, 255
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
; AIX32-NEXT: slw 3, 3, 24
-; AIX32-NEXT: L..BB0_67: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_67: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 27
; AIX32-NEXT: and 5, 17, 4
@@ -1094,13 +1094,13 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 27
; AIX32-NEXT: bne 0, L..BB0_67
-; AIX32-NEXT: # %bb.68: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.68: # %atomicrmw.end
; AIX32-NEXT: li 3, 0
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
; AIX32-NEXT: ori 3, 3, 65535
; AIX32-NEXT: slw 3, 3, 22
-; AIX32-NEXT: L..BB0_69: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_69: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 25
; AIX32-NEXT: and 5, 16, 4
@@ -1109,13 +1109,13 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 25
; AIX32-NEXT: bne 0, L..BB0_69
-; AIX32-NEXT: # %bb.70: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.70: # %atomicrmw.end
; AIX32-NEXT: li 3, 0
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
; AIX32-NEXT: ori 3, 3, 65535
; AIX32-NEXT: slw 3, 3, 21
-; AIX32-NEXT: L..BB0_71: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_71: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 23
; AIX32-NEXT: and 5, 15, 4
@@ -1124,25 +1124,25 @@ define dso_local void @test_op_ignore() local_unnamed_addr #0 {
; AIX32-NEXT: or 4, 5, 4
; AIX32-NEXT: stwcx. 4, 0, 23
; AIX32-NEXT: bne 0, L..BB0_71
-; AIX32-NEXT: # %bb.72: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.72: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
-; AIX32-NEXT: L..BB0_73: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_73: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 20
; AIX32-NEXT: and 3, 29, 3
; AIX32-NEXT: stwcx. 3, 0, 20
; AIX32-NEXT: bne 0, L..BB0_73
-; AIX32-NEXT: # %bb.74: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.74: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: sync
-; AIX32-NEXT: L..BB0_75: # %atomicrmw.end1
+; AIX32-NEXT: L..BB0_75: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 19
; AIX32-NEXT: and 3, 29, 3
; AIX32-NEXT: stwcx. 3, 0, 19
; AIX32-NEXT: bne 0, L..BB0_75
-; AIX32-NEXT: # %bb.76: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.76: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: li 4, 0
; AIX32-NEXT: li 5, 1
@@ -3863,7 +3863,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 7, 0(29)
; AIX32-NEXT: stw 3, 4(30)
; AIX32-NEXT: .align 4
-; AIX32-NEXT: L..BB2_61: # %atomicrmw.start
+; AIX32-NEXT: L..BB2_61: # %atomicrmw.start2
; AIX32-NEXT: #
; AIX32-NEXT: and 3, 4, 23
; AIX32-NEXT: stw 7, 80(1)
@@ -3889,7 +3889,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 7, 80(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB2_61
-; AIX32-NEXT: # %bb.62: # %atomicrmw.end
+; AIX32-NEXT: # %bb.62: # %atomicrmw.end1
; AIX32-NEXT: and 3, 4, 23
; AIX32-NEXT: stw 17, 0(29)
; AIX32-NEXT: lbz 23, 0(26)
@@ -3905,7 +3905,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 6, 4(29)
; AIX32-NEXT: lwz 7, 0(29)
; AIX32-NEXT: .align 4
-; AIX32-NEXT: L..BB2_63: # %atomicrmw.start2
+; AIX32-NEXT: L..BB2_63: # %atomicrmw.start
; AIX32-NEXT: #
; AIX32-NEXT: and 3, 4, 23
; AIX32-NEXT: stw 7, 80(1)
@@ -3931,7 +3931,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lwz 7, 80(1)
; AIX32-NEXT: cmplwi 3, 0
; AIX32-NEXT: beq 0, L..BB2_63
-; AIX32-NEXT: # %bb.64: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.64: # %atomicrmw.end
; AIX32-NEXT: and 3, 4, 23
; AIX32-NEXT: li 5, 255
; AIX32-NEXT: xor 3, 3, 17
@@ -3943,7 +3943,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
; AIX32-NEXT: slw 4, 3, 24
-; AIX32-NEXT: L..BB2_65: # %atomicrmw.end1
+; AIX32-NEXT: L..BB2_65: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 6, 0, 22
; AIX32-NEXT: and 7, 4, 6
@@ -3952,7 +3952,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: or 7, 7, 8
; AIX32-NEXT: stwcx. 7, 0, 22
; AIX32-NEXT: bne 0, L..BB2_65
-; AIX32-NEXT: # %bb.66: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.66: # %atomicrmw.end
; AIX32-NEXT: srw 4, 6, 24
; AIX32-NEXT: lwsync
; AIX32-NEXT: li 5, 255
@@ -3965,7 +3965,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: sync
; AIX32-NEXT: lwz 9, 56(1) # 4-byte Folded Reload
; AIX32-NEXT: slw 4, 3, 21
-; AIX32-NEXT: L..BB2_67: # %atomicrmw.end1
+; AIX32-NEXT: L..BB2_67: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 6, 0, 19
; AIX32-NEXT: and 7, 4, 6
@@ -3974,7 +3974,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: or 7, 7, 8
; AIX32-NEXT: stwcx. 7, 0, 19
; AIX32-NEXT: bne 0, L..BB2_67
-; AIX32-NEXT: # %bb.68: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.68: # %atomicrmw.end
; AIX32-NEXT: srw 4, 6, 21
; AIX32-NEXT: li 5, 0
; AIX32-NEXT: lwsync
@@ -3985,7 +3985,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: stb 3, 0(26)
; AIX32-NEXT: sync
; AIX32-NEXT: slw 4, 3, 18
-; AIX32-NEXT: L..BB2_69: # %atomicrmw.end1
+; AIX32-NEXT: L..BB2_69: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 6, 0, 16
; AIX32-NEXT: and 7, 4, 6
@@ -3994,7 +3994,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: or 7, 7, 8
; AIX32-NEXT: stwcx. 7, 0, 16
; AIX32-NEXT: bne 0, L..BB2_69
-; AIX32-NEXT: # %bb.70: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.70: # %atomicrmw.end
; AIX32-NEXT: srw 4, 6, 18
; AIX32-NEXT: lwsync
; AIX32-NEXT: li 5, 0
@@ -4006,7 +4006,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
; AIX32-NEXT: slw 4, 3, 15
-; AIX32-NEXT: L..BB2_71: # %atomicrmw.end1
+; AIX32-NEXT: L..BB2_71: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 6, 0, 14
; AIX32-NEXT: and 7, 4, 6
@@ -4015,7 +4015,7 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: or 7, 7, 8
; AIX32-NEXT: stwcx. 7, 0, 14
; AIX32-NEXT: bne 0, L..BB2_71
-; AIX32-NEXT: # %bb.72: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.72: # %atomicrmw.end
; AIX32-NEXT: srw 4, 6, 15
; AIX32-NEXT: lwsync
; AIX32-NEXT: clrlwi 4, 4, 16
@@ -4023,24 +4023,24 @@ define dso_local void @test_op_and_fetch() local_unnamed_addr #0 {
; AIX32-NEXT: sth 3, 0(20)
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
-; AIX32-NEXT: L..BB2_73: # %atomicrmw.end1
+; AIX32-NEXT: L..BB2_73: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 13
; AIX32-NEXT: and 4, 3, 4
; AIX32-NEXT: stwcx. 4, 0, 13
; AIX32-NEXT: bne 0, L..BB2_73
-; AIX32-NEXT: # %bb.74: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.74: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: stw 4, 0(13)
; AIX32-NEXT: lbz 3, 0(26)
; AIX32-NEXT: sync
-; AIX32-NEXT: L..BB2_75: # %atomicrmw.end1
+; AIX32-NEXT: L..BB2_75: # %atomicrmw.end
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 25
; AIX32-NEXT: and 4, 3, 4
; AIX32-NEXT: stwcx. 4, 0, 25
; AIX32-NEXT: bne 0, L..BB2_75
-; AIX32-NEXT: # %bb.76: # %atomicrmw.end1
+; AIX32-NEXT: # %bb.76: # %atomicrmw.end
; AIX32-NEXT: lwsync
; AIX32-NEXT: stw 4, 0(25)
; AIX32-NEXT: li 4, 0
diff --git a/llvm/test/CodeGen/X86/atomic6432.ll b/llvm/test/CodeGen/X86/atomic6432.ll
index b0167f41a5fe24..8ff5f338e1482f 100644
--- a/llvm/test/CodeGen/X86/atomic6432.ll
+++ b/llvm/test/CodeGen/X86/atomic6432.ll
@@ -14,7 +14,7 @@ define void @atomic_fetch_add64() nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_1
-; X32-NEXT: .LBB0_1: # %atomicrmw.start
+; X32-NEXT: .LBB0_1: # %atomicrmw.start14
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -27,13 +27,13 @@ define void @atomic_fetch_add64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_1
; X32-NEXT: jmp .LBB0_2
-; X32-NEXT: .LBB0_2: # %atomicrmw.end
+; X32-NEXT: .LBB0_2: # %atomicrmw.end13
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_3
-; X32-NEXT: .LBB0_3: # %atomicrmw.start2
+; X32-NEXT: .LBB0_3: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -46,13 +46,13 @@ define void @atomic_fetch_add64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_3
; X32-NEXT: jmp .LBB0_4
-; X32-NEXT: .LBB0_4: # %atomicrmw.end1
+; X32-NEXT: .LBB0_4: # %atomicrmw.end7
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_5
-; X32-NEXT: .LBB0_5: # %atomicrmw.start8
+; X32-NEXT: .LBB0_5: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -67,13 +67,13 @@ define void @atomic_fetch_add64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_5
; X32-NEXT: jmp .LBB0_6
-; X32-NEXT: .LBB0_6: # %atomicrmw.end7
+; X32-NEXT: .LBB0_6: # %atomicrmw.end1
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB0_7
-; X32-NEXT: .LBB0_7: # %atomicrmw.start14
+; X32-NEXT: .LBB0_7: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl (%esp), %eax # 4-byte Reload
@@ -88,7 +88,7 @@ define void @atomic_fetch_add64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB0_7
; X32-NEXT: jmp .LBB0_8
-; X32-NEXT: .LBB0_8: # %atomicrmw.end13
+; X32-NEXT: .LBB0_8: # %atomicrmw.end
; X32-NEXT: addl $40, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
@@ -112,7 +112,7 @@ define void @atomic_fetch_sub64() nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_1
-; X32-NEXT: .LBB1_1: # %atomicrmw.start
+; X32-NEXT: .LBB1_1: # %atomicrmw.start14
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -125,13 +125,13 @@ define void @atomic_fetch_sub64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_1
; X32-NEXT: jmp .LBB1_2
-; X32-NEXT: .LBB1_2: # %atomicrmw.end
+; X32-NEXT: .LBB1_2: # %atomicrmw.end13
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_3
-; X32-NEXT: .LBB1_3: # %atomicrmw.start2
+; X32-NEXT: .LBB1_3: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -144,13 +144,13 @@ define void @atomic_fetch_sub64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_3
; X32-NEXT: jmp .LBB1_4
-; X32-NEXT: .LBB1_4: # %atomicrmw.end1
+; X32-NEXT: .LBB1_4: # %atomicrmw.end7
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_5
-; X32-NEXT: .LBB1_5: # %atomicrmw.start8
+; X32-NEXT: .LBB1_5: # %atomicrmw.start2
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -165,13 +165,13 @@ define void @atomic_fetch_sub64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_5
; X32-NEXT: jmp .LBB1_6
-; X32-NEXT: .LBB1_6: # %atomicrmw.end7
+; X32-NEXT: .LBB1_6: # %atomicrmw.end1
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB1_7
-; X32-NEXT: .LBB1_7: # %atomicrmw.start14
+; X32-NEXT: .LBB1_7: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl (%esp), %eax # 4-byte Reload
@@ -186,7 +186,7 @@ define void @atomic_fetch_sub64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB1_7
; X32-NEXT: jmp .LBB1_8
-; X32-NEXT: .LBB1_8: # %atomicrmw.end13
+; X32-NEXT: .LBB1_8: # %atomicrmw.end
; X32-NEXT: addl $40, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
@@ -209,7 +209,7 @@ define void @atomic_fetch_and64() nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB2_1
-; X32-NEXT: .LBB2_1: # %atomicrmw.start
+; X32-NEXT: .LBB2_1: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -221,7 +221,7 @@ define void @atomic_fetch_and64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB2_1
; X32-NEXT: jmp .LBB2_2
-; X32-NEXT: .LBB2_2: # %atomicrmw.end
+; X32-NEXT: .LBB2_2: # %atomicrmw.end7
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -248,7 +248,7 @@ define void @atomic_fetch_and64() nounwind {
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB2_5
-; X32-NEXT: .LBB2_5: # %atomicrmw.start8
+; X32-NEXT: .LBB2_5: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl (%esp), %eax # 4-byte Reload
@@ -263,7 +263,7 @@ define void @atomic_fetch_and64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB2_5
; X32-NEXT: jmp .LBB2_6
-; X32-NEXT: .LBB2_6: # %atomicrmw.end7
+; X32-NEXT: .LBB2_6: # %atomicrmw.end
; X32-NEXT: addl $32, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
@@ -285,7 +285,7 @@ define void @atomic_fetch_or64() nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB3_1
-; X32-NEXT: .LBB3_1: # %atomicrmw.start
+; X32-NEXT: .LBB3_1: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -297,7 +297,7 @@ define void @atomic_fetch_or64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB3_1
; X32-NEXT: jmp .LBB3_2
-; X32-NEXT: .LBB3_2: # %atomicrmw.end
+; X32-NEXT: .LBB3_2: # %atomicrmw.end7
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -324,7 +324,7 @@ define void @atomic_fetch_or64() nounwind {
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB3_5
-; X32-NEXT: .LBB3_5: # %atomicrmw.start8
+; X32-NEXT: .LBB3_5: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl (%esp), %eax # 4-byte Reload
@@ -339,7 +339,7 @@ define void @atomic_fetch_or64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB3_5
; X32-NEXT: jmp .LBB3_6
-; X32-NEXT: .LBB3_6: # %atomicrmw.end7
+; X32-NEXT: .LBB3_6: # %atomicrmw.end
; X32-NEXT: addl $32, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
@@ -361,7 +361,7 @@ define void @atomic_fetch_xor64() nounwind {
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB4_1
-; X32-NEXT: .LBB4_1: # %atomicrmw.start
+; X32-NEXT: .LBB4_1: # %atomicrmw.start8
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
@@ -373,7 +373,7 @@ define void @atomic_fetch_xor64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB4_1
; X32-NEXT: jmp .LBB4_2
-; X32-NEXT: .LBB4_2: # %atomicrmw.end
+; X32-NEXT: .LBB4_2: # %atomicrmw.end7
; X32-NEXT: movl sc64+4, %edx
; X32-NEXT: movl sc64, %eax
; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -400,7 +400,7 @@ define void @atomic_fetch_xor64() nounwind {
; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jmp .LBB4_5
-; X32-NEXT: .LBB4_5: # %atomicrmw.start8
+; X32-NEXT: .LBB4_5: # %atomicrmw.start
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X32-NEXT: movl (%esp), %eax # 4-byte Reload
@@ -415,7 +415,7 @@ define void @atomic_fetch_xor64() nounwind {
; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X32-NEXT: jne .LBB4_5
; X32-NEXT: jmp .LBB4_6
-; X32-NEXT: .LBB4_6: # %atomicrmw.end7
+; X32-NEXT: .LBB4_6: # %atomicrmw.end
; X32-NEXT: addl $32, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %ebx
diff --git a/llvm/test/CodeGen/X86/pr5145.ll b/llvm/test/CodeGen/X86/pr5145.ll
index da15bd6720ce24..16398cb57061ff 100644
--- a/llvm/test/CodeGen/X86/pr5145.ll
+++ b/llvm/test/CodeGen/X86/pr5145.ll
@@ -7,7 +7,7 @@ define void @atomic_maxmin_i8() {
; CHECK: # %bb.0:
; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT: .LBB0_1: # %atomicrmw.start14
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpb $6, %al
; CHECK-NEXT: movzbl %al, %eax
@@ -16,10 +16,10 @@ define void @atomic_maxmin_i8() {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end13
; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_3: # %atomicrmw.start2
+; CHECK-NEXT: .LBB0_3: # %atomicrmw.start8
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpb $7, %al
; CHECK-NEXT: movzbl %al, %eax
@@ -28,10 +28,10 @@ define void @atomic_maxmin_i8() {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_3
-; CHECK-NEXT: # %bb.4: # %atomicrmw.end1
+; CHECK-NEXT: # %bb.4: # %atomicrmw.end7
; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_5: # %atomicrmw.start8
+; CHECK-NEXT: .LBB0_5: # %atomicrmw.start2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpb $8, %al
; CHECK-NEXT: movzbl %al, %eax
@@ -40,10 +40,10 @@ define void @atomic_maxmin_i8() {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_5
-; CHECK-NEXT: # %bb.6: # %atomicrmw.end7
+; CHECK-NEXT: # %bb.6: # %atomicrmw.end1
; CHECK-NEXT: movzbl sc8(%rip), %eax
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_7: # %atomicrmw.start14
+; CHECK-NEXT: .LBB0_7: # %atomicrmw.start
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpb $9, %al
; CHECK-NEXT: movzbl %al, %eax
@@ -52,7 +52,7 @@ define void @atomic_maxmin_i8() {
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip)
; CHECK-NEXT: jne .LBB0_7
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end13
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: retq
%1 = atomicrmw max ptr @sc8, i8 5 acquire
%2 = atomicrmw min ptr @sc8, i8 6 acquire
diff --git a/llvm/test/CodeGen/X86/pr59305.ll b/llvm/test/CodeGen/X86/pr59305.ll
index d8738081842a33..46c9da5a51939d 100644
--- a/llvm/test/CodeGen/X86/pr59305.ll
+++ b/llvm/test/CodeGen/X86/pr59305.ll
@@ -81,17 +81,20 @@ define double @bar(double %0) #0 {
; X64-NEXT: #APP
; X64-NEXT: ldmxcsr 0
; X64-NEXT: #NO_APP
+; X64-NEXT: wait
; X64-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X64-NEXT: movapd %xmm2, %xmm3
; X64-NEXT: divsd %xmm0, %xmm3
; X64-NEXT: #APP
; X64-NEXT: ldmxcsr 0
; X64-NEXT: #NO_APP
+; X64-NEXT: wait
; X64-NEXT: movapd %xmm2, %xmm1
; X64-NEXT: divsd %xmm0, %xmm1
; X64-NEXT: #APP
; X64-NEXT: ldmxcsr 0
; X64-NEXT: #NO_APP
+; X64-NEXT: wait
; X64-NEXT: divsd %xmm0, %xmm2
; X64-NEXT: movapd %xmm3, %xmm0
; X64-NEXT: callq fma at PLT
@@ -102,7 +105,6 @@ define double @bar(double %0) #0 {
; X86: # %bb.0:
; X86-NEXT: subl $28, %esp
; X86-NEXT: fldl {{[0-9]+}}(%esp)
-; X86-NEXT: wait
; X86-NEXT: #APP
; X86-NEXT: fldcw 0
; X86-NEXT: #NO_APP
More information about the llvm-commits
mailing list