[llvm] 549e118 - [PowerPC] Support 16-byte lock free atomics on pwr8 and up
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 8 16:26:02 PDT 2022
Author: Kai Luo
Date: 2022-04-08T23:25:56Z
New Revision: 549e118e93c666914a1045fde38a2cac33e1e445
URL: https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445
DIFF: https://github.com/llvm/llvm-project/commit/549e118e93c666914a1045fde38a2cac33e1e445.diff
LOG: [PowerPC] Support 16-byte lock free atomics on pwr8 and up
Make 16-byte atomic type aligned to 16-byte on PPC64, thus consistent with GCC. Also enable inlining 16-byte atomics on non-AIX targets on PPC64.
Reviewed By: hubert.reinterpretcast
Differential Revision: https://reviews.llvm.org/D122377
Added:
clang/test/CodeGen/PowerPC/quadword-atomics.c
Modified:
clang/lib/Basic/Targets/PPC.cpp
clang/lib/Basic/Targets/PPC.h
clang/test/CodeGen/PowerPC/atomic-alignment.c
clang/test/Sema/atomic-ops.c
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/PowerPC/atomics-i128.ll
Removed:
################################################################################
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index bafcc23b38334..1f2f583b9462d 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -81,6 +81,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
IsISA3_0 = true;
} else if (Feature == "+isa-v31-instructions") {
IsISA3_1 = true;
+ } else if (Feature == "+quadword-atomics") {
+ HasQuadwordAtomics = true;
}
// TODO: Finish this list and add an assert that we've handled them
// all.
@@ -550,6 +552,12 @@ bool PPCTargetInfo::initFeatureMap(
Features["isa-v30-instructions"] =
llvm::StringSwitch<bool>(CPU).Case("pwr9", true).Default(false);
+ Features["quadword-atomics"] =
+ getTriple().isArch64Bit() && llvm::StringSwitch<bool>(CPU)
+ .Case("pwr9", true)
+ .Case("pwr8", true)
+ .Default(false);
+
// Power10 includes all the same features as Power9 plus any features specific
// to the Power10 core.
if (CPU == "pwr10" || CPU == "power10") {
@@ -660,6 +668,7 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
.Case("isa-v207-instructions", IsISA2_07)
.Case("isa-v30-instructions", IsISA3_0)
.Case("isa-v31-instructions", IsISA3_1)
+ .Case("quadword-atomics", HasQuadwordAtomics)
.Default(false);
}
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index ac52eb219f54d..44489d06307f2 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -78,6 +78,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
bool IsISA2_07 = false;
bool IsISA3_0 = false;
bool IsISA3_1 = false;
+ bool HasQuadwordAtomics = false;
protected:
std::string ABI;
@@ -439,8 +440,18 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
DataLayout += "-S128-v256:256:256-v512:512:512";
resetDataLayout(DataLayout);
- // PPC64 supports atomics up to 8 bytes.
- MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+ // Newer PPC64 instruction sets support atomics up to 16 bytes.
+ MaxAtomicPromoteWidth = 128;
+ // Baseline PPC64 supports inlining atomics up to 8 bytes.
+ MaxAtomicInlineWidth = 64;
+ }
+
+ void setMaxAtomicWidth() override {
+ // For power8 and up, backend is able to inline 16-byte atomic lock free
+ // code.
+ // TODO: We should allow AIX to inline quadword atomics in the future.
+ if (!getTriple().isOSAIX() && hasFeature("quadword-atomics"))
+ MaxAtomicInlineWidth = 128;
}
BuiltinVaListKind getBuiltinVaListKind() const override {
diff --git a/clang/test/CodeGen/PowerPC/atomic-alignment.c b/clang/test/CodeGen/PowerPC/atomic-alignment.c
index cd6985962c39e..537ba1a95c048 100644
--- a/clang/test/CodeGen/PowerPC/atomic-alignment.c
+++ b/clang/test/CodeGen/PowerPC/atomic-alignment.c
@@ -1,25 +1,30 @@
-// RUN: %clang_cc1 -verify -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
+// RUN: %clang_cc1 -Werror -triple powerpc-unknown-unknown -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC,PPC32
-// RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
-// RUN: FileCheck %s --check-prefixes=PPC,PPC64
-// RUN: %clang_cc1 -verify -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
+// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \
// RUN: FileCheck %s --check-prefixes=PPC,PPC64
+// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s \
+// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,PPC64
+// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s | \
+// RUN: FileCheck %s --check-prefixes=PPC,AIX64
+// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s \
+// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,AIX64
// PPC: @c = global i8 0, align 1{{$}}
-_Atomic(char) c; // expected-no-diagnostics
+_Atomic(char) c;
// PPC: @s = global i16 0, align 2{{$}}
-_Atomic(short) s; // expected-no-diagnostics
+_Atomic(short) s;
// PPC: @i = global i32 0, align 4{{$}}
-_Atomic(int) i; // expected-no-diagnostics
+_Atomic(int) i;
// PPC32: @l = global i32 0, align 4{{$}}
// PPC64: @l = global i64 0, align 8{{$}}
-_Atomic(long) l; // expected-no-diagnostics
+// AIX64: @l = global i64 0, align 8{{$}}
+_Atomic(long) l;
// PPC: @ll = global i64 0, align 8{{$}}
-_Atomic(long long) ll; // expected-no-diagnostics
+_Atomic(long long) ll;
typedef struct {
char x[8];
@@ -27,11 +32,14 @@ typedef struct {
// PPC32: @o = global %struct.O zeroinitializer, align 1{{$}}
// PPC64: @o = global %struct.O zeroinitializer, align 8{{$}}
-_Atomic(O) o; // expected-no-diagnostics
+// AIX64: @o = global %struct.O zeroinitializer, align 8{{$}}
+_Atomic(O) o;
typedef struct {
char x[16];
} Q;
-// PPC: @q = global %struct.Q zeroinitializer, align 1{{$}}
-_Atomic(Q) q; // expected-no-diagnostics
+// PPC32: @q = global %struct.Q zeroinitializer, align 1{{$}}
+// PPC64: @q = global %struct.Q zeroinitializer, align 16{{$}}
+// AIX64: @q = global %struct.Q zeroinitializer, align 16{{$}}
+_Atomic(Q) q;
diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c
new file mode 100644
index 0000000000000..b1da89c2785b7
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/quadword-atomics.c
@@ -0,0 +1,92 @@
+// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
+// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-PWR8
+// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
+// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
+// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
+// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
+
+typedef struct {
+ char x[16];
+} Q;
+
+typedef _Atomic(Q) AtomicQ;
+
+typedef __int128_t int128_t;
+
+// PPC64-PWR8-LABEL: @test_load(
+// PPC64-PWR8: [[TMP3:%.*]] = load atomic i128, i128* [[TMP1:%.*]] acquire, align 16
+//
+// PPC64-LABEL: @test_load(
+// PPC64: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP3:%.*]], i8* noundef [[TMP4:%.*]], i32 noundef signext 2)
+//
+Q test_load(AtomicQ *ptr) {
+ // expected-no-diagnostics
+ return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
+}
+
+// PPC64-PWR8-LABEL: @test_store(
+// PPC64-PWR8: store atomic i128 [[TMP6:%.*]], i128* [[TMP4:%.*]] release, align 16
+//
+// PPC64-LABEL: @test_store(
+// PPC64: call void @__atomic_store(i64 noundef 16, i8* noundef [[TMP6:%.*]], i8* noundef [[TMP7:%.*]], i32 noundef signext 3)
+//
+void test_store(Q val, AtomicQ *ptr) {
+ // expected-no-diagnostics
+ __c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
+}
+
+// PPC64-PWR8-LABEL: @test_add(
+// PPC64-PWR8: [[TMP3:%.*]] = atomicrmw add i128* [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
+//
+// PPC64-LABEL: @test_add(
+// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(i8* noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
+//
+void test_add(_Atomic(int128_t) *ptr, int128_t x) {
+ // expected-no-diagnostics
+ __c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
+}
+
+// PPC64-PWR8-LABEL: @test_xchg(
+// PPC64-PWR8: [[TMP8:%.*]] = atomicrmw xchg i128* [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
+//
+// PPC64-LABEL: @test_xchg(
+// PPC64: call void @__atomic_exchange(i64 noundef 16, i8* noundef [[TMP7:%.*]], i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i32 noundef signext 5)
+//
+Q test_xchg(AtomicQ *ptr, Q new) {
+ // expected-no-diagnostics
+ return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
+}
+
+// PPC64-PWR8-LABEL: @test_cmpxchg(
+// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
+//
+// PPC64-LABEL: @test_cmpxchg(
+// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
+//
+int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
+ // expected-no-diagnostics
+ return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+}
+
+// PPC64-PWR8-LABEL: @test_cmpxchg_weak(
+// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg weak i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
+//
+// PPC64-LABEL: @test_cmpxchg_weak(
+// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
+//
+int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
+ // expected-no-diagnostics
+ return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+}
+
+// PPC64-PWR8-LABEL: @is_lock_free(
+// PPC64-PWR8: ret i32 1
+//
+// PPC64-LABEL: @is_lock_free(
+// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, i8* noundef null)
+//
+int is_lock_free() {
+ AtomicQ q;
+ // expected-no-diagnostics
+ return __c11_atomic_is_lock_free(sizeof(q));
+}
diff --git a/clang/test/Sema/atomic-ops.c b/clang/test/Sema/atomic-ops.c
index a3c156d6663b9..3ad469d337433 100644
--- a/clang/test/Sema/atomic-ops.c
+++ b/clang/test/Sema/atomic-ops.c
@@ -9,7 +9,7 @@
// RUN: -target-cpu pwr7
// RUN: %clang_cc1 %s -verify -fgnuc-version=4.2.1 -ffreestanding \
// RUN: -fsyntax-only -triple=powerpc64le-linux-gnu -std=c11 \
-// RUN: -target-cpu pwr8
+// RUN: -target-cpu pwr8 -DPPC64_PWR8
// Basic parsing/Sema tests for __c11_atomic_*
@@ -47,7 +47,11 @@ _Static_assert(__c11_atomic_is_lock_free(2), "");
_Static_assert(__c11_atomic_is_lock_free(3), ""); // expected-error {{not an integral constant expression}}
_Static_assert(__c11_atomic_is_lock_free(4), "");
_Static_assert(__c11_atomic_is_lock_free(8), "");
+#ifndef PPC64_PWR8
_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-error {{not an integral constant expression}}
+#else
+_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-no-error
+#endif
_Static_assert(__c11_atomic_is_lock_free(17), ""); // expected-error {{not an integral constant expression}}
_Static_assert(__atomic_is_lock_free(1, 0), "");
@@ -55,15 +59,23 @@ _Static_assert(__atomic_is_lock_free(2, 0), "");
_Static_assert(__atomic_is_lock_free(3, 0), ""); // expected-error {{not an integral constant expression}}
_Static_assert(__atomic_is_lock_free(4, 0), "");
_Static_assert(__atomic_is_lock_free(8, 0), "");
+#ifndef PPC64_PWR8
_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-error {{not an integral constant expression}}
+#else
+_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-no-error
+#endif
_Static_assert(__atomic_is_lock_free(17, 0), ""); // expected-error {{not an integral constant expression}}
_Static_assert(atomic_is_lock_free((atomic_char*)0), "");
_Static_assert(atomic_is_lock_free((atomic_short*)0), "");
_Static_assert(atomic_is_lock_free((atomic_int*)0), "");
_Static_assert(atomic_is_lock_free((atomic_long*)0), "");
+#ifndef PPC64_PWR8
// noi128-error at +1 {{__int128 is not supported on this target}}
_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-error {{not an integral constant expression}}
+#else
+_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-no-error
+#endif
_Static_assert(atomic_is_lock_free(0 + (atomic_char*)0), "");
char i8;
@@ -88,7 +100,11 @@ _Static_assert(__atomic_always_lock_free(2, 0), "");
_Static_assert(!__atomic_always_lock_free(3, 0), "");
_Static_assert(__atomic_always_lock_free(4, 0), "");
_Static_assert(__atomic_always_lock_free(8, 0), "");
+#ifndef PPC64_PWR8
_Static_assert(!__atomic_always_lock_free(16, 0), "");
+#else
+_Static_assert(__atomic_always_lock_free(16, 0), "");
+#endif
_Static_assert(!__atomic_always_lock_free(17, 0), "");
_Static_assert(__atomic_always_lock_free(1, incomplete), "");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b126ed486b0d6..3a04faf3e685a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1321,7 +1321,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
+ if (shouldInlineQuadwordAtomics()) {
setMaxAtomicSizeInBitsSupported(128);
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
@@ -18053,10 +18053,18 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
}
}
+bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
+ // TODO: 16-byte atomic type support for AIX is in progress; we should be able
+ // to inline 16-byte atomic ops on AIX too in the future.
+ return Subtarget.isPPC64() &&
+ (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
+ Subtarget.hasQuadwordAtomics();
+}
+
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
+ if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicRMWInIR(AI);
}
@@ -18064,7 +18072,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
+ if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
}
@@ -18094,8 +18102,7 @@ getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
- assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
- "Only support quadword now");
+ assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = Incr->getType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
@@ -18119,8 +18126,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
- assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
- "Only support quadword now");
+ assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Type *ValTy = CmpVal->getType();
assert(ValTy->getPrimitiveSizeInBits() == 128);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 826d26ce85631..f92a117fe27fd 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -910,6 +910,8 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ bool shouldInlineQuadwordAtomics() const;
+
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 95a2eb5df45ec..62b69aed56b1c 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -5,6 +5,22 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \
; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \
; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
+; RUN: --check-prefix=LE-PWR8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
+; RUN: --check-prefix=LE-PWR8 %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \
+; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \
+; RUN: --check-prefix=AIX64-PWR8 %s
+
+; On 32-bit PPC platform, 16-byte lock free atomic instructions are not available,
+; it's expected not to generate inlined lock-free code on such platforms, even arch level
+; is pwr8+ and `-ppc-quadword-atomics` is on.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \
+; RUN: -ppc-quadword-atomics -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \
+; RUN: | FileCheck --check-prefix=PPC-PWR8 %s
define i128 @swap(i128* %a, i128 %x) {
@@ -39,6 +55,62 @@ define i128 @swap(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: swap:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB0_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: mr r9, r4
+; LE-PWR8-NEXT: mr r8, r5
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB0_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: swap:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_lock_test_and_set_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: swap:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: bl __atomic_exchange
+; PPC-PWR8-NEXT: lwz r6, 28(r1)
+; PPC-PWR8-NEXT: lwz r5, 24(r1)
+; PPC-PWR8-NEXT: lwz r4, 20(r1)
+; PPC-PWR8-NEXT: lwz r3, 16(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw xchg i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -76,6 +148,109 @@ define i128 @add(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: add:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB1_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: addc r9, r4, r7
+; LE-PWR8-NEXT: adde r8, r5, r6
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB1_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: add:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_fetch_and_add_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: add:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r24, -32
+; PPC-PWR8-NEXT: .cfi_offset r25, -28
+; PPC-PWR8-NEXT: .cfi_offset r26, -24
+; PPC-PWR8-NEXT: .cfi_offset r27, -20
+; PPC-PWR8-NEXT: .cfi_offset r28, -16
+; PPC-PWR8-NEXT: .cfi_offset r29, -12
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r27, r5
+; PPC-PWR8-NEXT: mr r26, r3
+; PPC-PWR8-NEXT: lwz r5, 8(r3)
+; PPC-PWR8-NEXT: lwz r4, 4(r3)
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r28, r6
+; PPC-PWR8-NEXT: lwz r6, 12(r3)
+; PPC-PWR8-NEXT: lwz r3, 0(r3)
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: .p2align 4
+; PPC-PWR8-NEXT: .LBB1_1: # %atomicrmw.start
+; PPC-PWR8-NEXT: #
+; PPC-PWR8-NEXT: addc r7, r6, r30
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: adde r8, r5, r29
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: adde r4, r4, r28
+; PPC-PWR8-NEXT: stw r7, 28(r1)
+; PPC-PWR8-NEXT: stw r8, 24(r1)
+; PPC-PWR8-NEXT: mr r6, r24
+; PPC-PWR8-NEXT: adde r3, r3, r27
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: cmplwi r3, 0
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: beq cr0, .LBB1_1
+; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw add i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -113,6 +288,109 @@ define i128 @sub(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: sub:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB2_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: subc r9, r7, r4
+; LE-PWR8-NEXT: subfe r8, r5, r6
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB2_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: sub:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_fetch_and_sub_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: sub:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r24, -32
+; PPC-PWR8-NEXT: .cfi_offset r25, -28
+; PPC-PWR8-NEXT: .cfi_offset r26, -24
+; PPC-PWR8-NEXT: .cfi_offset r27, -20
+; PPC-PWR8-NEXT: .cfi_offset r28, -16
+; PPC-PWR8-NEXT: .cfi_offset r29, -12
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r27, r5
+; PPC-PWR8-NEXT: mr r26, r3
+; PPC-PWR8-NEXT: lwz r5, 8(r3)
+; PPC-PWR8-NEXT: lwz r4, 4(r3)
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r28, r6
+; PPC-PWR8-NEXT: lwz r6, 12(r3)
+; PPC-PWR8-NEXT: lwz r3, 0(r3)
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: .p2align 4
+; PPC-PWR8-NEXT: .LBB2_1: # %atomicrmw.start
+; PPC-PWR8-NEXT: #
+; PPC-PWR8-NEXT: subc r7, r6, r30
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: subfe r8, r29, r5
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: subfe r4, r28, r4
+; PPC-PWR8-NEXT: stw r7, 28(r1)
+; PPC-PWR8-NEXT: stw r8, 24(r1)
+; PPC-PWR8-NEXT: mr r6, r24
+; PPC-PWR8-NEXT: subfe r3, r27, r3
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: cmplwi r3, 0
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: beq cr0, .LBB2_1
+; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw sub i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -150,6 +428,109 @@ define i128 @and(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: and:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB3_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: and r9, r4, r7
+; LE-PWR8-NEXT: and r8, r5, r6
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB3_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: and:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_fetch_and_and_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: and:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r24, -32
+; PPC-PWR8-NEXT: .cfi_offset r25, -28
+; PPC-PWR8-NEXT: .cfi_offset r26, -24
+; PPC-PWR8-NEXT: .cfi_offset r27, -20
+; PPC-PWR8-NEXT: .cfi_offset r28, -16
+; PPC-PWR8-NEXT: .cfi_offset r29, -12
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r27, r5
+; PPC-PWR8-NEXT: mr r26, r3
+; PPC-PWR8-NEXT: lwz r5, 8(r3)
+; PPC-PWR8-NEXT: lwz r4, 4(r3)
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r28, r6
+; PPC-PWR8-NEXT: lwz r6, 12(r3)
+; PPC-PWR8-NEXT: lwz r3, 0(r3)
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: .p2align 4
+; PPC-PWR8-NEXT: .LBB3_1: # %atomicrmw.start
+; PPC-PWR8-NEXT: #
+; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: and r7, r5, r29
+; PPC-PWR8-NEXT: and r8, r6, r30
+; PPC-PWR8-NEXT: and r3, r3, r27
+; PPC-PWR8-NEXT: and r4, r4, r28
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: mr r6, r24
+; PPC-PWR8-NEXT: stw r8, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: cmplwi r3, 0
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: beq cr0, .LBB3_1
+; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw and i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -187,6 +568,109 @@ define i128 @or(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: or:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB4_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: or r9, r4, r7
+; LE-PWR8-NEXT: or r8, r5, r6
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB4_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: or:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_fetch_and_or_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: or:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r24, -32
+; PPC-PWR8-NEXT: .cfi_offset r25, -28
+; PPC-PWR8-NEXT: .cfi_offset r26, -24
+; PPC-PWR8-NEXT: .cfi_offset r27, -20
+; PPC-PWR8-NEXT: .cfi_offset r28, -16
+; PPC-PWR8-NEXT: .cfi_offset r29, -12
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r27, r5
+; PPC-PWR8-NEXT: mr r26, r3
+; PPC-PWR8-NEXT: lwz r5, 8(r3)
+; PPC-PWR8-NEXT: lwz r4, 4(r3)
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r28, r6
+; PPC-PWR8-NEXT: lwz r6, 12(r3)
+; PPC-PWR8-NEXT: lwz r3, 0(r3)
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: .p2align 4
+; PPC-PWR8-NEXT: .LBB4_1: # %atomicrmw.start
+; PPC-PWR8-NEXT: #
+; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: or r7, r5, r29
+; PPC-PWR8-NEXT: or r8, r6, r30
+; PPC-PWR8-NEXT: or r3, r3, r27
+; PPC-PWR8-NEXT: or r4, r4, r28
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: mr r6, r24
+; PPC-PWR8-NEXT: stw r8, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: cmplwi r3, 0
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: beq cr0, .LBB4_1
+; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw or i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -224,6 +708,109 @@ define i128 @xor(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: xor:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB5_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: xor r9, r4, r7
+; LE-PWR8-NEXT: xor r8, r5, r6
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB5_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: xor:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_fetch_and_xor_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: xor:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r24, -32
+; PPC-PWR8-NEXT: .cfi_offset r25, -28
+; PPC-PWR8-NEXT: .cfi_offset r26, -24
+; PPC-PWR8-NEXT: .cfi_offset r27, -20
+; PPC-PWR8-NEXT: .cfi_offset r28, -16
+; PPC-PWR8-NEXT: .cfi_offset r29, -12
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r27, r5
+; PPC-PWR8-NEXT: mr r26, r3
+; PPC-PWR8-NEXT: lwz r5, 8(r3)
+; PPC-PWR8-NEXT: lwz r4, 4(r3)
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r28, r6
+; PPC-PWR8-NEXT: lwz r6, 12(r3)
+; PPC-PWR8-NEXT: lwz r3, 0(r3)
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: .p2align 4
+; PPC-PWR8-NEXT: .LBB5_1: # %atomicrmw.start
+; PPC-PWR8-NEXT: #
+; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: xor r7, r5, r29
+; PPC-PWR8-NEXT: xor r8, r6, r30
+; PPC-PWR8-NEXT: xor r3, r3, r27
+; PPC-PWR8-NEXT: xor r4, r4, r28
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: mr r6, r24
+; PPC-PWR8-NEXT: stw r8, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: cmplwi r3, 0
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: beq cr0, .LBB5_1
+; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw xor i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -261,6 +848,109 @@ define i128 @nand(i128* %a, i128 %x) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: nand:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB6_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r6, 0, r3
+; LE-PWR8-NEXT: nand r9, r4, r7
+; LE-PWR8-NEXT: nand r8, r5, r6
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB6_1
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r7
+; LE-PWR8-NEXT: mr r4, r6
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: nand:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_fetch_and_nand_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: nand:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -80(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: .cfi_offset r24, -32
+; PPC-PWR8-NEXT: .cfi_offset r25, -28
+; PPC-PWR8-NEXT: .cfi_offset r26, -24
+; PPC-PWR8-NEXT: .cfi_offset r27, -20
+; PPC-PWR8-NEXT: .cfi_offset r28, -16
+; PPC-PWR8-NEXT: .cfi_offset r29, -12
+; PPC-PWR8-NEXT: .cfi_offset r30, -8
+; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r27, r5
+; PPC-PWR8-NEXT: mr r26, r3
+; PPC-PWR8-NEXT: lwz r5, 8(r3)
+; PPC-PWR8-NEXT: lwz r4, 4(r3)
+; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r28, r6
+; PPC-PWR8-NEXT: lwz r6, 12(r3)
+; PPC-PWR8-NEXT: lwz r3, 0(r3)
+; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: addi r24, r1, 16
+; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r29, r7
+; PPC-PWR8-NEXT: addi r25, r1, 32
+; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
+; PPC-PWR8-NEXT: mr r30, r8
+; PPC-PWR8-NEXT: .p2align 4
+; PPC-PWR8-NEXT: .LBB6_1: # %atomicrmw.start
+; PPC-PWR8-NEXT: #
+; PPC-PWR8-NEXT: stw r3, 32(r1)
+; PPC-PWR8-NEXT: stw r4, 36(r1)
+; PPC-PWR8-NEXT: nand r7, r5, r29
+; PPC-PWR8-NEXT: nand r8, r6, r30
+; PPC-PWR8-NEXT: nand r3, r3, r27
+; PPC-PWR8-NEXT: nand r4, r4, r28
+; PPC-PWR8-NEXT: stw r5, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 44(r1)
+; PPC-PWR8-NEXT: mr r5, r25
+; PPC-PWR8-NEXT: mr r6, r24
+; PPC-PWR8-NEXT: stw r8, 28(r1)
+; PPC-PWR8-NEXT: stw r7, 24(r1)
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: stw r4, 20(r1)
+; PPC-PWR8-NEXT: stw r3, 16(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: mr r4, r26
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: cmplwi r3, 0
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: beq cr0, .LBB6_1
+; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end
+; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload
+; PPC-PWR8-NEXT: lwz r0, 84(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 80
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = atomicrmw nand i128* %a, i128 %x seq_cst, align 16
ret i128 %0
@@ -306,6 +996,76 @@ define i128 @cas_weak_acquire_acquire(i128* %a, i128 %cmp, i128 %new) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: cas_weak_acquire_acquire:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: .LBB7_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r8, 0, r3
+; LE-PWR8-NEXT: xor r11, r9, r4
+; LE-PWR8-NEXT: xor r10, r8, r5
+; LE-PWR8-NEXT: or. r11, r11, r10
+; LE-PWR8-NEXT: bne cr0, .LBB7_3
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: mr r11, r6
+; LE-PWR8-NEXT: mr r10, r7
+; LE-PWR8-NEXT: stqcx. r10, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB7_1
+; LE-PWR8-NEXT: b .LBB7_4
+; LE-PWR8-NEXT: .LBB7_3: # %entry
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: .LBB7_4: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r9
+; LE-PWR8-NEXT: mr r4, r8
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: cas_weak_acquire_acquire:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: cas_weak_acquire_acquire:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: lwz r11, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: li r7, 2
+; PPC-PWR8-NEXT: li r8, 2
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: stw r11, 28(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new acquire acquire
%1 = extractvalue { i128, i1 } %0, 0
@@ -351,6 +1111,76 @@ define i128 @cas_weak_release_monotonic(i128* %a, i128 %cmp, i128 %new) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: cas_weak_release_monotonic:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: .LBB8_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r8, 0, r3
+; LE-PWR8-NEXT: xor r11, r9, r4
+; LE-PWR8-NEXT: xor r10, r8, r5
+; LE-PWR8-NEXT: or. r11, r11, r10
+; LE-PWR8-NEXT: bne cr0, .LBB8_3
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: mr r11, r6
+; LE-PWR8-NEXT: mr r10, r7
+; LE-PWR8-NEXT: stqcx. r10, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB8_1
+; LE-PWR8-NEXT: b .LBB8_4
+; LE-PWR8-NEXT: .LBB8_3: # %entry
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: .LBB8_4: # %entry
+; LE-PWR8-NEXT: mr r3, r9
+; LE-PWR8-NEXT: mr r4, r8
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: cas_weak_release_monotonic:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: cas_weak_release_monotonic:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: lwz r11, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: li r7, 3
+; PPC-PWR8-NEXT: li r8, 0
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: stw r11, 28(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new release monotonic
%1 = extractvalue { i128, i1 } %0, 0
@@ -398,6 +1228,78 @@ define i128 @cas_sc_sc(i128* %a, i128 %cmp, i128 %new) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: cas_sc_sc:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: sync
+; LE-PWR8-NEXT: .LBB9_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r8, 0, r3
+; LE-PWR8-NEXT: xor r11, r9, r4
+; LE-PWR8-NEXT: xor r10, r8, r5
+; LE-PWR8-NEXT: or. r11, r11, r10
+; LE-PWR8-NEXT: bne cr0, .LBB9_3
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: mr r11, r6
+; LE-PWR8-NEXT: mr r10, r7
+; LE-PWR8-NEXT: stqcx. r10, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB9_1
+; LE-PWR8-NEXT: b .LBB9_4
+; LE-PWR8-NEXT: .LBB9_3: # %entry
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: .LBB9_4: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r9
+; LE-PWR8-NEXT: mr r4, r8
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: cas_sc_sc:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: sync
+; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: cas_sc_sc:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: lwz r11, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: li r7, 5
+; PPC-PWR8-NEXT: li r8, 5
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: stw r11, 28(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg i128* %a, i128 %cmp, i128 %new seq_cst seq_cst
%1 = extractvalue { i128, i1 } %0, 0
@@ -445,6 +1347,78 @@ define i128 @cas_acqrel_acquire(i128* %a, i128 %cmp, i128 %new) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: cas_acqrel_acquire:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: .LBB10_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r8, 0, r3
+; LE-PWR8-NEXT: xor r11, r9, r4
+; LE-PWR8-NEXT: xor r10, r8, r5
+; LE-PWR8-NEXT: or. r11, r11, r10
+; LE-PWR8-NEXT: bne cr0, .LBB10_3
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: mr r11, r6
+; LE-PWR8-NEXT: mr r10, r7
+; LE-PWR8-NEXT: stqcx. r10, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB10_1
+; LE-PWR8-NEXT: b .LBB10_4
+; LE-PWR8-NEXT: .LBB10_3: # %entry
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: .LBB10_4: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: mr r3, r9
+; LE-PWR8-NEXT: mr r4, r8
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: cas_acqrel_acquire:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -112(r1)
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: addi r1, r1, 112
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: cas_acqrel_acquire:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: lwz r11, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: li r7, 4
+; PPC-PWR8-NEXT: li r8, 2
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: stw r11, 28(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: lwz r6, 44(r1)
+; PPC-PWR8-NEXT: lwz r5, 40(r1)
+; PPC-PWR8-NEXT: lwz r4, 36(r1)
+; PPC-PWR8-NEXT: lwz r3, 32(r1)
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg i128* %a, i128 %cmp, i128 %new acq_rel acquire
%1 = extractvalue { i128, i1 } %0, 0
@@ -508,6 +1482,88 @@ define i1 @cas_acqrel_acquire_check_succ(i128* %a, i128 %cmp, i128 %new) {
; PWR7-NEXT: ld r0, 16(r1)
; PWR7-NEXT: mtlr r0
; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: cas_acqrel_acquire_check_succ:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: .LBB11_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r8, 0, r3
+; LE-PWR8-NEXT: xor r11, r9, r4
+; LE-PWR8-NEXT: xor r10, r8, r5
+; LE-PWR8-NEXT: or. r11, r11, r10
+; LE-PWR8-NEXT: bne cr0, .LBB11_3
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: mr r11, r6
+; LE-PWR8-NEXT: mr r10, r7
+; LE-PWR8-NEXT: stqcx. r10, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB11_1
+; LE-PWR8-NEXT: b .LBB11_4
+; LE-PWR8-NEXT: .LBB11_3: # %entry
+; LE-PWR8-NEXT: stqcx. r8, 0, r3
+; LE-PWR8-NEXT: .LBB11_4: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: xor r3, r5, r8
+; LE-PWR8-NEXT: xor r4, r4, r9
+; LE-PWR8-NEXT: or r3, r4, r3
+; LE-PWR8-NEXT: cntlzd r3, r3
+; LE-PWR8-NEXT: rldicl r3, r3, 58, 63
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: cas_acqrel_acquire_check_succ:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: mflr r0
+; AIX64-PWR8-NEXT: std r0, 16(r1)
+; AIX64-PWR8-NEXT: stdu r1, -128(r1)
+; AIX64-PWR8-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; AIX64-PWR8-NEXT: std r31, 120(r1) # 8-byte Folded Spill
+; AIX64-PWR8-NEXT: mr r31, r5
+; AIX64-PWR8-NEXT: mr r30, r4
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT: nop
+; AIX64-PWR8-NEXT: xor r3, r3, r30
+; AIX64-PWR8-NEXT: xor r4, r4, r31
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: or r3, r4, r3
+; AIX64-PWR8-NEXT: ld r31, 120(r1) # 8-byte Folded Reload
+; AIX64-PWR8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; AIX64-PWR8-NEXT: cntlzd r3, r3
+; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63
+; AIX64-PWR8-NEXT: addi r1, r1, 128
+; AIX64-PWR8-NEXT: ld r0, 16(r1)
+; AIX64-PWR8-NEXT: mtlr r0
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stw r0, 4(r1)
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: lwz r11, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: li r7, 4
+; PPC-PWR8-NEXT: li r8, 2
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: stw r11, 28(r1)
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
entry:
%0 = cmpxchg i128* %a, i128 %cmp, i128 %new acq_rel acquire
%1 = extractvalue { i128, i1 } %0, 1
More information about the llvm-commits
mailing list