[llvm] 18679ac - [PowerPC] Adjust `MaxAtomicSizeInBitsSupported` on PPC64

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 8 17:03:14 PDT 2022


Author: Kai Luo
Date: 2022-04-09T00:03:09Z
New Revision: 18679ac0d7ec0fb3ccd500073827fde76bb206b0

URL: https://github.com/llvm/llvm-project/commit/18679ac0d7ec0fb3ccd500073827fde76bb206b0
DIFF: https://github.com/llvm/llvm-project/commit/18679ac0d7ec0fb3ccd500073827fde76bb206b0.diff

LOG: [PowerPC] Adjust `MaxAtomicSizeInBitsSupported` on PPC64

AtomicExpandPass uses this variable to determine emitting libcalls or not. The default value is 1024 and if we don't specify it for PPC64 explicitly, AtomicExpandPass won't emit `__atomic_*` libcalls for those target unable to inline atomic ops and finally the backend emits `__sync_*` libcalls. Thanks @efriedma for pointing it out.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D122868

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/test/CodeGen/PowerPC/atomics-i128.ll
    llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3a04faf3e685a..27ecece4bd607 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1322,7 +1322,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   }
 
   if (shouldInlineQuadwordAtomics()) {
-    setMaxAtomicSizeInBitsSupported(128);
     setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
     setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
     setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
@@ -1347,6 +1346,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   if (!isPPC64)
     setMaxAtomicSizeInBitsSupported(32);
+  else if (shouldInlineQuadwordAtomics())
+    setMaxAtomicSizeInBitsSupported(128);
+  else
+    setMaxAtomicSizeInBitsSupported(64);
 
   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 

diff  --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 62b69aed56b1c..69fd3488aeb4c 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -47,10 +47,9 @@ define i128 @swap(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_lock_test_and_set_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_exchange_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -77,10 +76,9 @@ define i128 @swap(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_lock_test_and_set_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -140,10 +138,9 @@ define i128 @add(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_fetch_and_add_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_fetch_add_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -170,10 +167,9 @@ define i128 @add(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_fetch_and_add_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_fetch_add_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -280,10 +276,9 @@ define i128 @sub(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_fetch_and_sub_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_fetch_sub_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -310,10 +305,9 @@ define i128 @sub(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_fetch_and_sub_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_fetch_sub_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -420,10 +414,9 @@ define i128 @and(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_fetch_and_and_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_fetch_and_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -450,10 +443,9 @@ define i128 @and(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_fetch_and_and_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_fetch_and_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -560,10 +552,9 @@ define i128 @or(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_fetch_and_or_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_fetch_or_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -590,10 +581,9 @@ define i128 @or(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_fetch_and_or_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_fetch_or_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -700,10 +690,9 @@ define i128 @xor(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_fetch_and_xor_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_fetch_xor_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -730,10 +719,9 @@ define i128 @xor(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_fetch_and_xor_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_fetch_xor_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -840,10 +828,9 @@ define i128 @nand(i128* %a, i128 %x) {
 ; PWR7-NEXT:    stdu r1, -112(r1)
 ; PWR7-NEXT:    .cfi_def_cfa_offset 112
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_fetch_and_nand_16
+; PWR7-NEXT:    li r6, 5
+; PWR7-NEXT:    bl __atomic_fetch_nand_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
 ; PWR7-NEXT:    addi r1, r1, 112
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
@@ -870,10 +857,9 @@ define i128 @nand(i128* %a, i128 %x) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_fetch_and_nand_16[PR]
+; AIX64-PWR8-NEXT:    li r6, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_fetch_nand_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
 ; AIX64-PWR8-NEXT:    addi r1, r1, 112
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
@@ -986,13 +972,21 @@ define i128 @cas_weak_acquire_acquire(i128* %a, i128 %cmp, i128 %new) {
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
 ; PWR7-NEXT:    std r0, 16(r1)
-; PWR7-NEXT:    stdu r1, -112(r1)
-; PWR7-NEXT:    .cfi_def_cfa_offset 112
+; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
+; PWR7-NEXT:    std r5, 120(r1)
+; PWR7-NEXT:    std r4, 112(r1)
+; PWR7-NEXT:    addi r4, r1, 112
+; PWR7-NEXT:    mr r5, r6
+; PWR7-NEXT:    mr r6, r7
+; PWR7-NEXT:    li r7, 2
+; PWR7-NEXT:    li r8, 2
+; PWR7-NEXT:    bl __atomic_compare_exchange_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    addi r1, r1, 112
+; PWR7-NEXT:    ld r4, 120(r1)
+; PWR7-NEXT:    ld r3, 112(r1)
+; PWR7-NEXT:    addi r1, r1, 128
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
 ; PWR7-NEXT:    blr
@@ -1025,11 +1019,19 @@ define i128 @cas_weak_acquire_acquire(i128* %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
-; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r5, 120(r1)
+; AIX64-PWR8-NEXT:    std r4, 112(r1)
+; AIX64-PWR8-NEXT:    addi r4, r1, 112
+; AIX64-PWR8-NEXT:    mr r5, r6
+; AIX64-PWR8-NEXT:    mr r6, r7
+; AIX64-PWR8-NEXT:    li r7, 2
+; AIX64-PWR8-NEXT:    li r8, 2
+; AIX64-PWR8-NEXT:    bl .__atomic_compare_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    addi r1, r1, 112
+; AIX64-PWR8-NEXT:    ld r4, 120(r1)
+; AIX64-PWR8-NEXT:    ld r3, 112(r1)
+; AIX64-PWR8-NEXT:    addi r1, r1, 128
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
 ; AIX64-PWR8-NEXT:    blr
@@ -1101,13 +1103,21 @@ define i128 @cas_weak_release_monotonic(i128* %a, i128 %cmp, i128 %new) {
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
 ; PWR7-NEXT:    std r0, 16(r1)
-; PWR7-NEXT:    stdu r1, -112(r1)
-; PWR7-NEXT:    .cfi_def_cfa_offset 112
+; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
+; PWR7-NEXT:    std r5, 120(r1)
+; PWR7-NEXT:    std r4, 112(r1)
+; PWR7-NEXT:    addi r4, r1, 112
+; PWR7-NEXT:    mr r5, r6
+; PWR7-NEXT:    mr r6, r7
+; PWR7-NEXT:    li r7, 3
+; PWR7-NEXT:    li r8, 0
+; PWR7-NEXT:    bl __atomic_compare_exchange_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    addi r1, r1, 112
+; PWR7-NEXT:    ld r4, 120(r1)
+; PWR7-NEXT:    ld r3, 112(r1)
+; PWR7-NEXT:    addi r1, r1, 128
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
 ; PWR7-NEXT:    blr
@@ -1140,11 +1150,19 @@ define i128 @cas_weak_release_monotonic(i128* %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
-; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r5, 120(r1)
+; AIX64-PWR8-NEXT:    std r4, 112(r1)
+; AIX64-PWR8-NEXT:    addi r4, r1, 112
+; AIX64-PWR8-NEXT:    mr r5, r6
+; AIX64-PWR8-NEXT:    mr r6, r7
+; AIX64-PWR8-NEXT:    li r7, 3
+; AIX64-PWR8-NEXT:    li r8, 0
+; AIX64-PWR8-NEXT:    bl .__atomic_compare_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    addi r1, r1, 112
+; AIX64-PWR8-NEXT:    ld r4, 120(r1)
+; AIX64-PWR8-NEXT:    ld r3, 112(r1)
+; AIX64-PWR8-NEXT:    addi r1, r1, 128
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
 ; AIX64-PWR8-NEXT:    blr
@@ -1217,14 +1235,21 @@ define i128 @cas_sc_sc(i128* %a, i128 %cmp, i128 %new) {
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
 ; PWR7-NEXT:    std r0, 16(r1)
-; PWR7-NEXT:    stdu r1, -112(r1)
-; PWR7-NEXT:    .cfi_def_cfa_offset 112
+; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    sync
-; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
+; PWR7-NEXT:    std r5, 120(r1)
+; PWR7-NEXT:    std r4, 112(r1)
+; PWR7-NEXT:    addi r4, r1, 112
+; PWR7-NEXT:    mr r5, r6
+; PWR7-NEXT:    mr r6, r7
+; PWR7-NEXT:    li r7, 5
+; PWR7-NEXT:    li r8, 5
+; PWR7-NEXT:    bl __atomic_compare_exchange_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    addi r1, r1, 112
+; PWR7-NEXT:    ld r4, 120(r1)
+; PWR7-NEXT:    ld r3, 112(r1)
+; PWR7-NEXT:    addi r1, r1, 128
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
 ; PWR7-NEXT:    blr
@@ -1258,12 +1283,19 @@ define i128 @cas_sc_sc(i128* %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
-; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    sync
-; AIX64-PWR8-NEXT:    bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r5, 120(r1)
+; AIX64-PWR8-NEXT:    std r4, 112(r1)
+; AIX64-PWR8-NEXT:    addi r4, r1, 112
+; AIX64-PWR8-NEXT:    mr r5, r6
+; AIX64-PWR8-NEXT:    mr r6, r7
+; AIX64-PWR8-NEXT:    li r7, 5
+; AIX64-PWR8-NEXT:    li r8, 5
+; AIX64-PWR8-NEXT:    bl .__atomic_compare_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    addi r1, r1, 112
+; AIX64-PWR8-NEXT:    ld r4, 120(r1)
+; AIX64-PWR8-NEXT:    ld r3, 112(r1)
+; AIX64-PWR8-NEXT:    addi r1, r1, 128
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
 ; AIX64-PWR8-NEXT:    blr
@@ -1336,14 +1368,21 @@ define i128 @cas_acqrel_acquire(i128* %a, i128 %cmp, i128 %new) {
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
 ; PWR7-NEXT:    std r0, 16(r1)
-; PWR7-NEXT:    stdu r1, -112(r1)
-; PWR7-NEXT:    .cfi_def_cfa_offset 112
+; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
+; PWR7-NEXT:    std r5, 120(r1)
+; PWR7-NEXT:    std r4, 112(r1)
+; PWR7-NEXT:    addi r4, r1, 112
+; PWR7-NEXT:    mr r5, r6
+; PWR7-NEXT:    mr r6, r7
+; PWR7-NEXT:    li r7, 4
+; PWR7-NEXT:    li r8, 2
+; PWR7-NEXT:    bl __atomic_compare_exchange_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    addi r1, r1, 112
+; PWR7-NEXT:    ld r4, 120(r1)
+; PWR7-NEXT:    ld r3, 112(r1)
+; PWR7-NEXT:    addi r1, r1, 128
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
 ; PWR7-NEXT:    blr
@@ -1377,12 +1416,19 @@ define i128 @cas_acqrel_acquire(i128* %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8:       # %bb.0: # %entry
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
-; AIX64-PWR8-NEXT:    stdu r1, -112(r1)
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
+; AIX64-PWR8-NEXT:    std r5, 120(r1)
+; AIX64-PWR8-NEXT:    std r4, 112(r1)
+; AIX64-PWR8-NEXT:    addi r4, r1, 112
+; AIX64-PWR8-NEXT:    mr r5, r6
+; AIX64-PWR8-NEXT:    mr r6, r7
+; AIX64-PWR8-NEXT:    li r7, 4
+; AIX64-PWR8-NEXT:    li r8, 2
+; AIX64-PWR8-NEXT:    bl .__atomic_compare_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    addi r1, r1, 112
+; AIX64-PWR8-NEXT:    ld r4, 120(r1)
+; AIX64-PWR8-NEXT:    ld r3, 112(r1)
+; AIX64-PWR8-NEXT:    addi r1, r1, 128
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0
 ; AIX64-PWR8-NEXT:    blr
@@ -1458,27 +1504,19 @@ define i1 @cas_acqrel_acquire_check_succ(i128* %a, i128 %cmp, i128 %new) {
 ; PWR7:       # %bb.0: # %entry
 ; PWR7-NEXT:    mflr r0
 ; PWR7-NEXT:    std r0, 16(r1)
-; PWR7-NEXT:    stdu r1, -144(r1)
-; PWR7-NEXT:    .cfi_def_cfa_offset 144
+; PWR7-NEXT:    stdu r1, -128(r1)
+; PWR7-NEXT:    .cfi_def_cfa_offset 128
 ; PWR7-NEXT:    .cfi_offset lr, 16
-; PWR7-NEXT:    .cfi_offset r29, -24
-; PWR7-NEXT:    .cfi_offset r30, -16
-; PWR7-NEXT:    std r29, 120(r1) # 8-byte Folded Spill
-; PWR7-NEXT:    std r30, 128(r1) # 8-byte Folded Spill
-; PWR7-NEXT:    mr r30, r5
-; PWR7-NEXT:    mr r29, r4
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    bl __sync_val_compare_and_swap_16
+; PWR7-NEXT:    std r5, 120(r1)
+; PWR7-NEXT:    std r4, 112(r1)
+; PWR7-NEXT:    addi r4, r1, 112
+; PWR7-NEXT:    mr r5, r6
+; PWR7-NEXT:    mr r6, r7
+; PWR7-NEXT:    li r7, 4
+; PWR7-NEXT:    li r8, 2
+; PWR7-NEXT:    bl __atomic_compare_exchange_16
 ; PWR7-NEXT:    nop
-; PWR7-NEXT:    xor r3, r3, r29
-; PWR7-NEXT:    xor r4, r4, r30
-; PWR7-NEXT:    lwsync
-; PWR7-NEXT:    or r3, r4, r3
-; PWR7-NEXT:    ld r30, 128(r1) # 8-byte Folded Reload
-; PWR7-NEXT:    ld r29, 120(r1) # 8-byte Folded Reload
-; PWR7-NEXT:    cntlzd r3, r3
-; PWR7-NEXT:    rldicl r3, r3, 58, 63
-; PWR7-NEXT:    addi r1, r1, 144
+; PWR7-NEXT:    addi r1, r1, 128
 ; PWR7-NEXT:    ld r0, 16(r1)
 ; PWR7-NEXT:    mtlr r0
 ; PWR7-NEXT:    blr
@@ -1516,21 +1554,15 @@ define i1 @cas_acqrel_acquire_check_succ(i128* %a, i128 %cmp, i128 %new) {
 ; AIX64-PWR8-NEXT:    mflr r0
 ; AIX64-PWR8-NEXT:    std r0, 16(r1)
 ; AIX64-PWR8-NEXT:    stdu r1, -128(r1)
-; AIX64-PWR8-NEXT:    std r30, 112(r1) # 8-byte Folded Spill
-; AIX64-PWR8-NEXT:    std r31, 120(r1) # 8-byte Folded Spill
-; AIX64-PWR8-NEXT:    mr r31, r5
-; AIX64-PWR8-NEXT:    mr r30, r4
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    bl .__sync_val_compare_and_swap_16[PR]
+; AIX64-PWR8-NEXT:    std r5, 120(r1)
+; AIX64-PWR8-NEXT:    std r4, 112(r1)
+; AIX64-PWR8-NEXT:    addi r4, r1, 112
+; AIX64-PWR8-NEXT:    mr r5, r6
+; AIX64-PWR8-NEXT:    mr r6, r7
+; AIX64-PWR8-NEXT:    li r7, 4
+; AIX64-PWR8-NEXT:    li r8, 2
+; AIX64-PWR8-NEXT:    bl .__atomic_compare_exchange_16[PR]
 ; AIX64-PWR8-NEXT:    nop
-; AIX64-PWR8-NEXT:    xor r3, r3, r30
-; AIX64-PWR8-NEXT:    xor r4, r4, r31
-; AIX64-PWR8-NEXT:    lwsync
-; AIX64-PWR8-NEXT:    or r3, r4, r3
-; AIX64-PWR8-NEXT:    ld r31, 120(r1) # 8-byte Folded Reload
-; AIX64-PWR8-NEXT:    ld r30, 112(r1) # 8-byte Folded Reload
-; AIX64-PWR8-NEXT:    cntlzd r3, r3
-; AIX64-PWR8-NEXT:    rldicl r3, r3, 58, 63
 ; AIX64-PWR8-NEXT:    addi r1, r1, 128
 ; AIX64-PWR8-NEXT:    ld r0, 16(r1)
 ; AIX64-PWR8-NEXT:    mtlr r0

diff  --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
index e4dcc920b67c7..3e44f5ccbf37b 100644
--- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
+++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll
@@ -34,10 +34,22 @@ define i1 @test_cmpxchg_seq_cst(i128* %addr, i128 %desire, i128 %new) {
 ;
 ; PWR7-LABEL: @test_cmpxchg_seq_cst(
 ; PWR7-NEXT:  entry:
-; PWR7-NEXT:    call void @llvm.ppc.sync()
-; PWR7-NEXT:    [[PAIR:%.*]] = cmpxchg weak i128* [[ADDR:%.*]], i128 [[DESIRE:%.*]], i128 [[NEW:%.*]] monotonic monotonic, align 16
-; PWR7-NEXT:    call void @llvm.ppc.lwsync()
-; PWR7-NEXT:    [[SUCC:%.*]] = extractvalue { i128, i1 } [[PAIR]], 1
+; PWR7-NEXT:    [[TMP0:%.*]] = bitcast i128* [[ADDR:%.*]] to i8*
+; PWR7-NEXT:    [[TMP1:%.*]] = alloca i128, align 8
+; PWR7-NEXT:    [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8*
+; PWR7-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP2]])
+; PWR7-NEXT:    store i128 [[DESIRE:%.*]], i128* [[TMP1]], align 8
+; PWR7-NEXT:    [[TMP3:%.*]] = alloca i128, align 8
+; PWR7-NEXT:    [[TMP4:%.*]] = bitcast i128* [[TMP3]] to i8*
+; PWR7-NEXT:    call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP4]])
+; PWR7-NEXT:    store i128 [[NEW:%.*]], i128* [[TMP3]], align 8
+; PWR7-NEXT:    [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* [[TMP0]], i8* [[TMP2]], i8* [[TMP4]], i32 5, i32 5)
+; PWR7-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP4]])
+; PWR7-NEXT:    [[TMP6:%.*]] = load i128, i128* [[TMP1]], align 8
+; PWR7-NEXT:    call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP2]])
+; PWR7-NEXT:    [[TMP7:%.*]] = insertvalue { i128, i1 } undef, i128 [[TMP6]], 0
+; PWR7-NEXT:    [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1
+; PWR7-NEXT:    [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1
 ; PWR7-NEXT:    ret i1 [[SUCC]]
 ;
 entry:


        


More information about the llvm-commits mailing list