[llvm] b577ec4 - [AtomicExpandPass][AArch64] Promote xchg with floating-point types to integer ones
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 28 23:57:34 PDT 2021
Author: LemonBoy
Date: 2021-05-29T08:57:27+02:00
New Revision: b577ec495698c585837db3893c5662d3aa0aab87
URL: https://github.com/llvm/llvm-project/commit/b577ec495698c585837db3893c5662d3aa0aab87
DIFF: https://github.com/llvm/llvm-project/commit/b577ec495698c585837db3893c5662d3aa0aab87.diff
LOG: [AtomicExpandPass][AArch64] Promote xchg with floating-point types to integer ones
Follow the same strategy used for atomic loads/stores by converting the operands to equally-sized integer types.
This change prevents the atomic expansion pass from generating illegal LL/SC pairs when targeting AArch64: `expand-atomicrmw-xchg-fp.ll` would previously instantiate intrinsics such as `llvm.aarch64.ldaxr.p0f32` that cannot be lowered.
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D103232
Added:
llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
Modified:
llvm/lib/CodeGen/AtomicExpandPass.cpp
llvm/test/CodeGen/X86/atomicf128.ll
llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 2b3b025d58a92..1474573bb41de 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -78,6 +78,7 @@ namespace {
StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
+ AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
Value *
insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
Align AddrAlign, AtomicOrdering MemOpOrder,
@@ -281,9 +282,18 @@ bool AtomicExpand::runOnFunction(Function &F) {
if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
MadeChange = true;
} else {
+ AtomicRMWInst::BinOp Op = RMWI->getOperation();
+ if (Op == AtomicRMWInst::Xchg &&
+ RMWI->getValOperand()->getType()->isFloatingPointTy()) {
+ // TODO: add a TLI hook to control this so that each target can
+ // convert to lowering the original type one at a time.
+ RMWI = convertAtomicXchgToIntegerType(RMWI);
+ assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
+ "invariant broken");
+ MadeChange = true;
+ }
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(RMWI);
- AtomicRMWInst::BinOp Op = RMWI->getOperation();
if (ValueSize < MinCASSize &&
(Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
Op == AtomicRMWInst::And)) {
@@ -363,6 +373,32 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
return NewLI;
}
+AtomicRMWInst *
+AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
+ auto *M = RMWI->getModule();
+ Type *NewTy =
+ getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
+
+ IRBuilder<> Builder(RMWI);
+
+ Value *Addr = RMWI->getPointerOperand();
+ Value *Val = RMWI->getValOperand();
+ Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
+ Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+ Value *NewVal = Builder.CreateBitCast(Val, NewTy);
+
+ auto *NewRMWI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
+ RMWI->getAlign(), RMWI->getOrdering());
+ NewRMWI->setVolatile(RMWI->isVolatile());
+ LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
+
+ Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
+ RMWI->replaceAllUsesWith(NewRVal);
+ RMWI->eraseFromParent();
+ return NewRMWI;
+}
+
bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
new file mode 100644
index 0000000000000..52d8ce9bcdb3d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
+
+define half @test_rmw_xchg_f16(half* %dst, half %new) {
+; NOLSE-LABEL: test_rmw_xchg_f16:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: // kill: def $h0 killed $h0 def $s0
+; NOLSE-NEXT: fmov w8, s0
+; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOLSE-NEXT: ldaxrh w9, [x0]
+; NOLSE-NEXT: stlxrh w10, w8, [x0]
+; NOLSE-NEXT: cbnz w10, .LBB0_1
+; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT: fmov s0, w9
+; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_rmw_xchg_f16:
+; LSE: // %bb.0:
+; LSE-NEXT: // kill: def $h0 killed $h0 def $s0
+; LSE-NEXT: fmov w8, s0
+; LSE-NEXT: swpalh w8, w8, [x0]
+; LSE-NEXT: fmov s0, w8
+; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
+; LSE-NEXT: ret
+ %res = atomicrmw xchg half* %dst, half %new seq_cst
+ ret half %res
+}
+
+define float @test_rmw_xchg_f32(float* %dst, float %new) {
+; NOLSE-LABEL: test_rmw_xchg_f32:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: fmov w9, s0
+; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOLSE-NEXT: ldaxr w8, [x0]
+; NOLSE-NEXT: stlxr w10, w9, [x0]
+; NOLSE-NEXT: cbnz w10, .LBB1_1
+; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT: fmov s0, w8
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_rmw_xchg_f32:
+; LSE: // %bb.0:
+; LSE-NEXT: fmov w8, s0
+; LSE-NEXT: swpal w8, w8, [x0]
+; LSE-NEXT: fmov s0, w8
+; LSE-NEXT: ret
+ %res = atomicrmw xchg float* %dst, float %new seq_cst
+ ret float %res
+}
+
+define double @test_rmw_xchg_f64(double* %dst, double %new) {
+; NOLSE-LABEL: test_rmw_xchg_f64:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: fmov x8, d0
+; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOLSE-NEXT: ldaxr x9, [x0]
+; NOLSE-NEXT: stlxr w10, x8, [x0]
+; NOLSE-NEXT: cbnz w10, .LBB2_1
+; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT: fmov d0, x9
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_rmw_xchg_f64:
+; LSE: // %bb.0:
+; LSE-NEXT: fmov x8, d0
+; LSE-NEXT: swpal x8, x8, [x0]
+; LSE-NEXT: fmov d0, x8
+; LSE-NEXT: ret
+ %res = atomicrmw xchg double* %dst, double %new seq_cst
+ ret double %res
+}
+
+define fp128 @test_rmw_xchg_f128(fp128* %dst, fp128 %new) {
+; NOLSE-LABEL: test_rmw_xchg_f128:
+; NOLSE: // %bb.0:
+; NOLSE-NEXT: sub sp, sp, #32 // =32
+; NOLSE-NEXT: .cfi_def_cfa_offset 32
+; NOLSE-NEXT: str q0, [sp, #16]
+; NOLSE-NEXT: ldp x9, x8, [sp, #16]
+; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
+; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
+; NOLSE-NEXT: ldaxp x11, x10, [x0]
+; NOLSE-NEXT: stlxp w12, x9, x8, [x0]
+; NOLSE-NEXT: cbnz w12, .LBB3_1
+; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT: stp x11, x10, [sp]
+; NOLSE-NEXT: ldr q0, [sp], #32
+; NOLSE-NEXT: ret
+;
+; LSE-LABEL: test_rmw_xchg_f128:
+; LSE: // %bb.0:
+; LSE-NEXT: sub sp, sp, #32 // =32
+; LSE-NEXT: .cfi_def_cfa_offset 32
+; LSE-NEXT: str q0, [sp, #16]
+; LSE-NEXT: ldp x9, x8, [sp, #16]
+; LSE-NEXT: .LBB3_1: // %atomicrmw.start
+; LSE-NEXT: // =>This Inner Loop Header: Depth=1
+; LSE-NEXT: ldaxp x11, x10, [x0]
+; LSE-NEXT: stlxp w12, x9, x8, [x0]
+; LSE-NEXT: cbnz w12, .LBB3_1
+; LSE-NEXT: // %bb.2: // %atomicrmw.end
+; LSE-NEXT: stp x11, x10, [sp]
+; LSE-NEXT: ldr q0, [sp], #32
+; LSE-NEXT: ret
+ %res = atomicrmw xchg fp128* %dst, fp128 %new seq_cst
+ ret fp128 %res
+}
diff --git a/llvm/test/CodeGen/X86/atomicf128.ll b/llvm/test/CodeGen/X86/atomicf128.ll
index cbec96c1c88f0..1fc5a0eaf7327 100644
--- a/llvm/test/CodeGen/X86/atomicf128.ll
+++ b/llvm/test/CodeGen/X86/atomicf128.ll
@@ -10,21 +10,16 @@ define void @atomic_fetch_swapf128(fp128 %x) nounwind {
; CHECK-LABEL: atomic_fetch_swapf128:
; CHECK: ## %bb.0:
; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: movq _fsc128@{{.*}}(%rip), %rsi
-; CHECK-NEXT: movaps (%rsi), %xmm1
+; CHECK-NEXT: movq (%rsi), %rax
+; CHECK-NEXT: movq 8(%rsi), %rdx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_1: ## %atomicrmw.start
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rbx
-; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
-; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
-; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
; CHECK-NEXT: lock cmpxchg16b (%rsi)
-; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
-; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1
; CHECK-NEXT: jne LBB0_1
; CHECK-NEXT: ## %bb.2: ## %atomicrmw.end
; CHECK-NEXT: popq %rbx
diff --git a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
index 86b4cff469804..065878223e6a7 100644
--- a/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
+++ b/llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-fp.ll
@@ -4,21 +4,25 @@
define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
; CHECK-LABEL: @atomic_swap_f16(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[PTR:%.*]] to i16*
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast half [[VAL:%.*]] to i16
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f16(half* [[PTR:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i16
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP2]] to half
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast half [[VAL:%.*]] to i16
-; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f16(i64 [[TMP5]], half* [[PTR]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i16(i16* [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i16
+; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0i16(i64 [[TMP5]], i16* [[TMP1]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP4]] to half
; CHECK-NEXT: ret void
;
; OUTLINE-ATOMICS-LABEL: @atomic_swap_f16(
-; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg half* [[PTR:%.*]], half [[VAL:%.*]] acquire
+; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast half* [[PTR:%.*]] to i16*
+; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = bitcast half [[VAL:%.*]] to i16
+; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i16* [[TMP1]], i16 [[TMP2]] acquire, align 2
+; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP3]] to half
; OUTLINE-ATOMICS-NEXT: ret void
;
%t1 = atomicrmw xchg half* %ptr, half %val acquire
@@ -27,21 +31,25 @@ define void @atomic_swap_f16(half* %ptr, half %val) nounwind {
define void @atomic_swap_f32(float* %ptr, float %val) nounwind {
; CHECK-LABEL: @atomic_swap_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32*
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[VAL:%.*]] to i32
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f32(float* [[PTR:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast float [[VAL:%.*]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0f32(i64 [[TMP5]], float* [[PTR]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i32(i32* [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.aarch64.stxr.p0i32(i64 [[TMP5]], i32* [[TMP1]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP6]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP4]] to float
; CHECK-NEXT: ret void
;
; OUTLINE-ATOMICS-LABEL: @atomic_swap_f32(
-; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg float* [[PTR:%.*]], float [[VAL:%.*]] acquire
+; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast float* [[PTR:%.*]] to i32*
+; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = bitcast float [[VAL:%.*]] to i32
+; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i32* [[TMP1]], i32 [[TMP2]] acquire, align 4
+; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float
; OUTLINE-ATOMICS-NEXT: ret void
;
%t1 = atomicrmw xchg float* %ptr, float %val acquire
@@ -50,19 +58,23 @@ define void @atomic_swap_f32(float* %ptr, float %val) nounwind {
define void @atomic_swap_f64(double* %ptr, double %val) nounwind {
; CHECK-LABEL: @atomic_swap_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTR:%.*]] to i64*
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[VAL:%.*]] to i64
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.ldaxr.p0f64(double* [[PTR:%.*]])
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to double
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[VAL:%.*]] to i64
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0f64(i64 [[TMP3]], double* [[PTR]])
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[TMP2]], i64* [[TMP1]])
; CHECK-NEXT: [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
; CHECK: atomicrmw.end:
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64 [[TMP3]] to double
; CHECK-NEXT: ret void
;
; OUTLINE-ATOMICS-LABEL: @atomic_swap_f64(
-; OUTLINE-ATOMICS-NEXT: [[T1:%.*]] = atomicrmw xchg double* [[PTR:%.*]], double [[VAL:%.*]] acquire
+; OUTLINE-ATOMICS-NEXT: [[TMP1:%.*]] = bitcast double* [[PTR:%.*]] to i64*
+; OUTLINE-ATOMICS-NEXT: [[TMP2:%.*]] = bitcast double [[VAL:%.*]] to i64
+; OUTLINE-ATOMICS-NEXT: [[TMP3:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 [[TMP2]] acquire, align 8
+; OUTLINE-ATOMICS-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to double
; OUTLINE-ATOMICS-NEXT: ret void
;
%t1 = atomicrmw xchg double* %ptr, double %val acquire
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll
index 3389cf0c19deb..e7a540df986e5 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-fp.ll
@@ -3,19 +3,18 @@
define double @atomic_xchg_f64(double* %ptr) nounwind {
; CHECK-LABEL: @atomic_xchg_f64(
-; CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[PTR:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[PTR:%.*]] to i64*
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[PTR]] to i64*
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; CHECK-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i64* [[TMP1]], i64 [[LOADED]], i64 4616189618054758400 seq_cst seq_cst, align 8
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret double [[TMP5]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[NEWLOADED]] to double
+; CHECK-NEXT: ret double [[TMP4]]
;
%result = atomicrmw xchg double* %ptr, double 4.0 seq_cst
ret double %result
@@ -23,19 +22,18 @@ define double @atomic_xchg_f64(double* %ptr) nounwind {
define double @atomic_xchg_f64_as1(double addrspace(1)* %ptr) nounwind {
; CHECK-LABEL: @atomic_xchg_f64_as1(
-; CHECK-NEXT: [[TMP1:%.*]] = load double, double addrspace(1)* [[PTR:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast double addrspace(1)* [[PTR:%.*]] to i64 addrspace(1)*
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64 addrspace(1)* [[TMP1]], align 8
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi double [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast double addrspace(1)* [[PTR]] to i64 addrspace(1)*
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast double [[LOADED]] to i64
-; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg i64 addrspace(1)* [[TMP2]], i64 [[TMP3]], i64 4616189618054758400 seq_cst seq_cst
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to double
+; CHECK-NEXT: [[LOADED:%.*]] = phi i64 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = cmpxchg i64 addrspace(1)* [[TMP1]], i64 [[LOADED]], i64 4616189618054758400 seq_cst seq_cst, align 8
+; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP3]], 1
+; CHECK-NEXT: [[NEWLOADED]] = extractvalue { i64, i1 } [[TMP3]], 0
; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
; CHECK: atomicrmw.end:
-; CHECK-NEXT: ret double [[TMP5]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[NEWLOADED]] to double
+; CHECK-NEXT: ret double [[TMP4]]
;
%result = atomicrmw xchg double addrspace(1)* %ptr, double 4.0 seq_cst
ret double %result
More information about the llvm-commits
mailing list