[llvm] [LICM] Improve LICM when calls only change Inaccessible memory (PR #169379)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 04:18:38 PDT 2026


https://github.com/CarolineConcatto updated https://github.com/llvm/llvm-project/pull/169379

>From dba62f14ec1cf9617bc3c2e9ec06fbe982b61010 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Fri, 31 Oct 2025 16:26:46 +0000
Subject: [PATCH 1/2] [Draft][LICM] Modify noConflictingReadWrites behaviour
 for calls

Change behaviour of noConflictingReadWrites for calls, it
allows to hoist calls from the loop when there is no conflict
with other memory Uses in the loop.

This patch implements two changes for the Memory access that are Use:

1) Alias-aware handling of call Uses:
Check if the memory Uses in the loop reads the same memory locations
as 'I' modifies(the call that can be hoisted). If not, 'I' can be hoisted.
There is a positive test called loop_alias tests for these changes.

2) Only consider Uses occurring before 'I'(the call that can be hoisted)
Only checks clobber for all memory uses in the loop that happens before 'I'.
This change is tested with hoist_until_I.
---
 llvm/lib/Transforms/Scalar/LICM.cpp           |  25 +++-
 llvm/test/Transforms/LICM/call-hoisting.ll    |  21 +--
 .../LICM/hoist-inaccesiblemem-call.ll         | 132 ++++++++++++++++++
 3 files changed, 163 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll

diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 8ac1cb6a899ba..0203682e4e0ee 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2317,6 +2317,15 @@ static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
   auto *IMD = MSSA->getMemoryAccess(I);
   BatchAAResults BAA(*AA);
   auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
+  auto *CallI = dyn_cast<CallBase>(I);
+  auto doesItReadILoc = [&](Instruction *UserI) -> bool {
+    // Blocks if call reads the location I.
+    if (auto *OtherCB = dyn_cast_or_null<CallBase>(UserI)) {
+      ModRefInfo MRI = AA->getModRefInfo(CallI, OtherCB);
+      return isRefSet(MRI);
+    }
+    return true;
+  };
   // Make sure there are no clobbers inside the loop.
   if (!MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()))
     return false;
@@ -2330,18 +2339,22 @@ static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
     auto *Accesses = MSSA->getBlockAccesses(BB);
     if (!Accesses)
       continue;
+    bool FoundI = false;
     for (const auto &MA : *Accesses)
       if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
         auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
                                              const_cast<MemoryUse *>(MU));
-        if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()))
+        // Check clobbering only for Uses that happens before I
+        if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()) &&
+            !FoundI)
           return false;
         // Disable hoisting past potentially interfering loads. Optimized
         // Uses may point to an access outside the loop, as getClobbering
         // checks the previous iteration when walking the backedge.
-        // FIXME: More precise: no Uses that alias I.
-        if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU))
-          return false;
+        if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU)) {
+          auto *UserI = MU->getMemoryInst();
+          return !doesItReadILoc(UserI);
+        }
       } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
         if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
           (void)LI; // Silence warning.
@@ -2361,8 +2374,10 @@ static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
             auto *SCI = cast<CallInst>(I);
             // If the instruction we are wanting to hoist is also a call
             // instruction then we need not check mod/ref info with itself
-            if (SCI == CI)
+            if (SCI == CI) {
+              FoundI = true;
               continue;
+            }
             ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
             if (isModOrRefSet(MRI))
               return false;
diff --git a/llvm/test/Transforms/LICM/call-hoisting.ll b/llvm/test/Transforms/LICM/call-hoisting.ll
index aa8c8bbed550e..44c8eb174b948 100644
--- a/llvm/test/Transforms/LICM/call-hoisting.ll
+++ b/llvm/test/Transforms/LICM/call-hoisting.ll
@@ -367,18 +367,18 @@ exit:
 define void @neg_ref(ptr %loc) {
 ; CHECK-LABEL: define void @neg_ref(
 ; CHECK-SAME: ptr [[LOC:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ]
+; CHECK-NEXT:  [[LOOP:.*]]:
 ; CHECK-NEXT:    call void @store(i32 0, ptr [[LOC]])
 ; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[LOC]], align 4
 ; CHECK-NEXT:    [[EARLYCND:%.*]] = icmp eq i32 [[V]], 198
+; CHECK-NEXT:    br label %[[LOOP1:.*]]
+; CHECK:       [[LOOP1]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[LOOP]] ], [ [[IV_NEXT:%.*]], %[[BACKEDGE:.*]] ]
 ; CHECK-NEXT:    br i1 [[EARLYCND]], label %[[EXIT1:.*]], label %[[BACKEDGE]]
 ; CHECK:       [[BACKEDGE]]:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
-; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT2:.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP1]], label %[[EXIT2:.*]]
 ; CHECK:       [[EXIT1]]:
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[EXIT2]]:
@@ -504,15 +504,15 @@ exit:
 define void @neg_not_argmemonly(ptr %loc, ptr %loc2) {
 ; CHECK-LABEL: define void @neg_not_argmemonly(
 ; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*]]:
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:  [[LOOP:.*]]:
 ; CHECK-NEXT:    call void @not_argmemonly(i32 0, ptr [[LOC]])
 ; CHECK-NEXT:    call void @load(i32 0, ptr [[LOC2]])
+; CHECK-NEXT:    br label %[[LOOP1:.*]]
+; CHECK:       [[LOOP1]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[LOOP]] ], [ [[IV_NEXT:%.*]], %[[LOOP1]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[IV]], 200
-; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP1]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
@@ -588,3 +588,4 @@ exit:
   ret void
 }
 
+
diff --git a/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll b/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll
new file mode 100644
index 0000000000000..d3ff60ef6ba1d
--- /dev/null
+++ b/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<target-ir>,loop-mssa(licm)' < %s -S | FileCheck %s
+
+
+define dso_local i32 @loop_alias(i32 %x, ptr %a, ptr %b)  #0{
+; CHECK-LABEL: define dso_local i32 @loop_alias(
+; CHECK-SAME: i32 [[X:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[VAL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 1
+; CHECK-NEXT:    call void @fn_write_inaccessible_mem()
+; CHECK-NEXT:    call void @fn_read_inaccessible_mem()
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi ptr [ [[GEP:%.*]], %[[LOOP]] ], [ [[VAL]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr [[PHI]], i64 0
+; CHECK-NEXT:    [[VAL2:%.*]] = call i32 @fn_args(ptr [[GEP2]])
+; CHECK-NEXT:    [[GEP]] = getelementptr inbounds nuw i32, ptr [[PHI]], i64 0
+; CHECK-NEXT:    [[ACC:%.*]] = add nuw nsw i32 [[VAL2]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ACC]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[AFTER_LOOP:.*]]
+; CHECK:       [[AFTER_LOOP]]:
+; CHECK-NEXT:    [[ACC_LCSSA:%.*]] = phi i32 [ [[ACC]], %[[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[ACC_LCSSA]]
+;
+entry:
+  %val = getelementptr inbounds nuw i32, ptr %a, i64 1
+  br label %loop
+loop:
+  %phi = phi ptr [ %gep, %loop ], [ %val, %entry ]
+  %44 = load i32, ptr %phi, align 16
+  %gep2 = getelementptr i8, ptr %phi, i64 0
+  %val2 = call i32 @fn_args(ptr  %gep2)
+  call void @fn_write_inaccessible_mem()
+  call void @fn_read_inaccessible_mem()
+  %gep = getelementptr inbounds nuw i32, ptr %phi, i64 0
+  %acc = add nuw nsw i32 %val2, 1
+  %cmp = icmp ult i32 %acc, 10
+  br i1 %cmp, label %loop, label %after_loop
+after_loop:
+  ret i32 %acc
+}
+
+declare i32  @fn_args(ptr) nounwind willreturn
+memory(argmem: read)
+
+define void @hoist_untill_I(ptr noalias %loc, ptr noalias %loc2){
+; CHECK-LABEL: define void @hoist_untill_I(
+; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[LOC2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[LOC2]], align 4
+; CHECK-NEXT:    call void @fn_write_inaccessible_mem()
+; CHECK-NEXT:    call void @fn_read_inaccessible_mem()
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP:.*:]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[LOC]], align 4
+; CHECK-NEXT:    br label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+for.body:
+  %val = load i32, ptr %loc2
+  store i32 %val, ptr %loc
+  call void @fn_write_inaccessible_mem()
+  call void @fn_read_inaccessible_mem()
+  br label %for.body
+}
+
+
+define void @neg_hoist_untill_I(ptr noalias %loc, ptr noalias %loc2){
+; CHECK-LABEL: define void @neg_hoist_untill_I(
+; CHECK-SAME: ptr noalias [[LOC:%.*]], ptr noalias [[LOC2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[LOC2]], align 4
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[LOC]], align 4
+; CHECK-NEXT:    call void @fn_read_inaccessible_mem()
+; CHECK-NEXT:    call void @fn_write_inaccessible_mem()
+; CHECK-NEXT:    call void @fn_read_inaccessible_mem()
+; CHECK-NEXT:    br label %[[FOR_BODY]]
+;
+entry:
+  br label %for.body
+for.body:
+  %val = load i32, ptr %loc2
+  store i32 %val, ptr %loc
+  call void @fn_read_inaccessible_mem()
+  call void @fn_write_inaccessible_mem()
+  call void @fn_read_inaccessible_mem()
+  br label %for.body
+}
+
+
+; Nothing should be hoisted from the loop because volatile
+; sets inaccessible memory to read write
+define void @neg_volatile(ptr %loc, ptr %loc2) {
+; CHECK-LABEL: define void @neg_volatile(
+; CHECK-SAME: ptr [[LOC:%.*]], ptr [[LOC2:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    store volatile i32 0, ptr [[LOC]], align 4
+; CHECK-NEXT:    call void @fn_write_inaccessible_mem()
+; CHECK-NEXT:    call void @fn_read_inaccessible_mem()
+; CHECK-NEXT:    br label %[[LOOP]]
+;
+entry:
+  br label %loop
+
+loop:
+  %val = load i32, ptr %loc2
+  store volatile i32 0, ptr %loc
+  call void @fn_write_inaccessible_mem()
+  call void @fn_read_inaccessible_mem()
+  br label %loop
+}
+
+declare void @fn_write_inaccessible_mem()#0
+  memory(inaccessiblemem:  write)
+
+declare void @fn_read_inaccessible_mem()#0
+  memory(inaccessiblemem: read)
+
+declare void @fn_readwrite_inaccessible_mem()#0
+  memory(inaccessiblemem: readwrite)
+
+; Needs to set nounwind because of doesNotThrow
+attributes #0 = { mustprogress nofree norecurse nosync nounwind}

>From 394e5dd6268692f6adae9e026f443aaff062a0c7 Mon Sep 17 00:00:00 2001
From: CarolineConcatto <caroline.concatto at arm.com>
Date: Tue, 3 Mar 2026 18:21:33 +0000
Subject: [PATCH 2/2] Address review comments

---
 llvm/lib/Transforms/Scalar/LICM.cpp           |  28 +-
 llvm/test/Analysis/MemorySSA/pr43438.ll       |   6 +-
 .../CodeGen/PowerPC/PR35812-neg-cmpxchg.ll    |  16 +-
 llvm/test/CodeGen/PowerPC/all-atomics.ll      | 262 ++++--
 .../CodeGen/PowerPC/atomics-regression.ll     | 872 +++++++++++-------
 llvm/test/CodeGen/PowerPC/atomics.ll          |  12 +-
 .../LICM/hoist-inaccesiblemem-call.ll         |   2 +-
 llvm/test/Transforms/LICM/pr50367.ll          |   4 +-
 llvm/test/Transforms/LICM/store-hoisting.ll   |  13 +-
 9 files changed, 766 insertions(+), 449 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 0203682e4e0ee..ece42c5ef1e8a 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2317,7 +2317,7 @@ static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
   auto *IMD = MSSA->getMemoryAccess(I);
   BatchAAResults BAA(*AA);
   auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, IMD);
-  auto *CallI = dyn_cast<CallBase>(I);
+  auto *CallI = dyn_cast_or_null<CallBase>(I);
   auto doesItReadILoc = [&](Instruction *UserI) -> bool {
     // Blocks if call reads the location I.
     if (auto *OtherCB = dyn_cast_or_null<CallBase>(UserI)) {
@@ -2339,21 +2339,31 @@ static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
     auto *Accesses = MSSA->getBlockAccesses(BB);
     if (!Accesses)
       continue;
-    bool FoundI = false;
     for (const auto &MA : *Accesses)
       if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
         auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
                                              const_cast<MemoryUse *>(MU));
-        // Check clobbering only for Uses that happens before I
-        if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock()) &&
-            !FoundI)
-          return false;
+
+        if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock())) {
+          auto *MDI = dyn_cast_or_null<MemoryDef>(MD);
+          // It checks only if I is clobbering.
+          // If MDI is not the same as I (that it wants to hoist), assumes they
+          // clobber
+          if (!MDI || MDI->getMemoryInst() != I)
+            return false;
+        }
         // Disable hoisting past potentially interfering loads. Optimized
         // Uses may point to an access outside the loop, as getClobbering
         // checks the previous iteration when walking the backedge.
         if (!Flags.getIsSink() && !MSSA->dominates(IMD, MU)) {
           auto *UserI = MU->getMemoryInst();
-          return !doesItReadILoc(UserI);
+          // Calls with different memory definitions can be optimized
+          // by checking if ModRefInfo does not conflict
+          if (CallI && UserI) {
+            if (doesItReadILoc(UserI))
+              return false;
+          } else
+            return false;
         }
       } else if (const auto *MD = dyn_cast<MemoryDef>(&MA)) {
         if (auto *LI = dyn_cast<LoadInst>(MD->getMemoryInst())) {
@@ -2374,10 +2384,8 @@ static bool noConflictingReadWrites(Instruction *I, MemorySSA *MSSA,
             auto *SCI = cast<CallInst>(I);
             // If the instruction we are wanting to hoist is also a call
             // instruction then we need not check mod/ref info with itself
-            if (SCI == CI) {
-              FoundI = true;
+            if (SCI == CI)
               continue;
-            }
             ModRefInfo MRI = BAA.getModRefInfo(CI, SCI);
             if (isModOrRefSet(MRI))
               return false;
diff --git a/llvm/test/Analysis/MemorySSA/pr43438.ll b/llvm/test/Analysis/MemorySSA/pr43438.ll
index 0e09137c1cf9a..c2a8d68930e0d 100644
--- a/llvm/test/Analysis/MemorySSA/pr43438.ll
+++ b/llvm/test/Analysis/MemorySSA/pr43438.ll
@@ -46,11 +46,7 @@ if.end569:                                        ; preds = %if.else568, %if.the
 
 
 ; CHECK-LABEL: @f(i1 %arg)
-; CHECK: 7 = MemoryPhi(
-; CHECK: 6 = MemoryPhi(
-; CHECK: 10 = MemoryPhi(
-; CHECK: 9 = MemoryPhi(
-; CHECK: 8 = MemoryPhi(
+; CHECK: 4 = MemoryPhi(
 define void @f(i1 %arg) {
 entry:
   %e = alloca i16, align 1
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index 2d8e0e869a860..a6b9f22506675 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -21,17 +21,21 @@ define signext i32 @main() nounwind {
 ; CHECK-NEXT:    cmplwi 4, 33059
 ; CHECK-NEXT:    bne- 0, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; CHECK-NEXT:    sync
 ; CHECK-NEXT:    li 4, 234
+; CHECK-NEXT:    sync
+; CHECK-NEXT:    sthcx. 4, 0, 3
+; CHECK-NEXT:    mfocrf 4, 128
+; CHECK-NEXT:    srwi 4, 4, 28
+; CHECK-NEXT:    rlwinm 4, 4, 31, 31, 31
+; CHECK-NEXT:    cmpwi 4, 1
 ; CHECK-NEXT:  .LBB0_2: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 4, 0, 3
-; CHECK-NEXT:    beq+ 0, .LBB0_5
+; CHECK-NEXT:    bc 12, 2, .LBB0_5
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lharx 5, 0, 3
-; CHECK-NEXT:    cmplwi 5, 33059
-; CHECK-NEXT:    beq+ 0, .LBB0_2
+; CHECK-NEXT:    lharx 4, 0, 3
+; CHECK-NEXT:    cmplwi 1, 4, 33059
+; CHECK-NEXT:    beq+ 1, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 93968b7153bf8..bf1e4cde09d0e 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4350,15 +4350,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne- 0, .LBB3_4
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore276
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stbcx. 8, 0, 6
+; CHECK-NEXT:    mfocrf 8, 128
+; CHECK-NEXT:    srwi 8, 8, 28
+; CHECK-NEXT:    rlwinm 8, 8, 31, 31, 31
+; CHECK-NEXT:    cmpwi 8, 1
 ; CHECK-NEXT:  .LBB3_2: # %cmpxchg.trystore275
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stbcx. 8, 0, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_4
+; CHECK-NEXT:    bc 12, 2, .LBB3_4
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload274
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 5, 0, 6
-; CHECK-NEXT:    cmplw 5, 7
-; CHECK-NEXT:    beq+ 0, .LBB3_2
+; CHECK-NEXT:    cmplw 1, 5, 7
+; CHECK-NEXT:    beq+ 1, .LBB3_2
 ; CHECK-NEXT:  .LBB3_4: # %cmpxchg.nostore272
 ; CHECK-NEXT:    addi 7, 3, uc at toc@l
 ; CHECK-NEXT:    lwsync
@@ -4368,17 +4372,21 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    cmplw 8, 9
 ; CHECK-NEXT:    bne- 0, .LBB3_8
 ; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore257
-; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 5, 5, 24
+; CHECK-NEXT:    sync
+; CHECK-NEXT:    stbcx. 5, 0, 7
+; CHECK-NEXT:    mfocrf 5, 128
+; CHECK-NEXT:    srwi 5, 5, 28
+; CHECK-NEXT:    rlwinm 5, 5, 31, 31, 31
+; CHECK-NEXT:    cmpwi 5, 1
 ; CHECK-NEXT:  .LBB3_6: # %cmpxchg.trystore256
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stbcx. 5, 0, 7
-; CHECK-NEXT:    beq+ 0, .LBB3_8
+; CHECK-NEXT:    bc 12, 2, .LBB3_8
 ; CHECK-NEXT:  # %bb.7: # %cmpxchg.releasedload255
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 8, 0, 7
-; CHECK-NEXT:    cmplw 8, 9
-; CHECK-NEXT:    beq+ 0, .LBB3_6
+; CHECK-NEXT:    cmplw 1, 8, 9
+; CHECK-NEXT:    beq+ 1, .LBB3_6
 ; CHECK-NEXT:  .LBB3_8: # %cmpxchg.nostore253
 ; CHECK-NEXT:    addis 5, 2, ss at toc@ha
 ; CHECK-NEXT:    lwsync
@@ -4393,15 +4401,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    extsb 11, 11
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 11, 11, 16
+; CHECK-NEXT:    sthcx. 11, 0, 8
+; CHECK-NEXT:    mfocrf 11, 128
+; CHECK-NEXT:    srwi 11, 11, 28
+; CHECK-NEXT:    rlwinm 11, 11, 31, 31, 31
+; CHECK-NEXT:    cmpwi 11, 1
 ; CHECK-NEXT:  .LBB3_10: # %cmpxchg.trystore237
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 11, 0, 8
-; CHECK-NEXT:    beq+ 0, .LBB3_12
+; CHECK-NEXT:    bc 12, 2, .LBB3_12
 ; CHECK-NEXT:  # %bb.11: # %cmpxchg.releasedload236
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 9, 0, 8
-; CHECK-NEXT:    cmplw 9, 10
-; CHECK-NEXT:    beq+ 0, .LBB3_10
+; CHECK-NEXT:    cmplw 1, 9, 10
+; CHECK-NEXT:    beq+ 1, .LBB3_10
 ; CHECK-NEXT:  .LBB3_12: # %cmpxchg.nostore234
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 9, ss at toc@l(5)
@@ -4416,15 +4428,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    extsb 12, 12
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 12, 12, 16
+; CHECK-NEXT:    sthcx. 12, 0, 9
+; CHECK-NEXT:    mfocrf 12, 128
+; CHECK-NEXT:    srwi 12, 12, 28
+; CHECK-NEXT:    rlwinm 12, 12, 31, 31, 31
+; CHECK-NEXT:    cmpwi 12, 1
 ; CHECK-NEXT:  .LBB3_14: # %cmpxchg.trystore218
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 12, 0, 9
-; CHECK-NEXT:    beq+ 0, .LBB3_16
+; CHECK-NEXT:    bc 12, 2, .LBB3_16
 ; CHECK-NEXT:  # %bb.15: # %cmpxchg.releasedload217
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 10, 0, 9
-; CHECK-NEXT:    cmplw 10, 11
-; CHECK-NEXT:    beq+ 0, .LBB3_14
+; CHECK-NEXT:    cmplw 1, 10, 11
+; CHECK-NEXT:    beq+ 1, .LBB3_14
 ; CHECK-NEXT:  .LBB3_16: # %cmpxchg.nostore215
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 10, us at toc@l(5)
@@ -4438,15 +4454,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.17: # %cmpxchg.fencedstore200
 ; CHECK-NEXT:    extsb 0, 0
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stwcx. 0, 0, 10
+; CHECK-NEXT:    mfocrf 0, 128
+; CHECK-NEXT:    srwi 0, 0, 28
+; CHECK-NEXT:    rlwinm 0, 0, 31, 31, 31
+; CHECK-NEXT:    cmpwi 0, 1
 ; CHECK-NEXT:  .LBB3_18: # %cmpxchg.trystore199
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stwcx. 0, 0, 10
-; CHECK-NEXT:    beq+ 0, .LBB3_20
+; CHECK-NEXT:    bc 12, 2, .LBB3_20
 ; CHECK-NEXT:  # %bb.19: # %cmpxchg.releasedload198
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 11, 0, 10
-; CHECK-NEXT:    cmplw 11, 12
-; CHECK-NEXT:    beq+ 0, .LBB3_18
+; CHECK-NEXT:    cmplw 1, 11, 12
+; CHECK-NEXT:    beq+ 1, .LBB3_18
 ; CHECK-NEXT:  .LBB3_20: # %cmpxchg.nostore196
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 11, si at toc@l(5)
@@ -4460,15 +4480,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.21: # %cmpxchg.fencedstore181
 ; CHECK-NEXT:    extsb 30, 30
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stwcx. 30, 0, 11
+; CHECK-NEXT:    mfocrf 30, 128
+; CHECK-NEXT:    srwi 30, 30, 28
+; CHECK-NEXT:    rlwinm 30, 30, 31, 31, 31
+; CHECK-NEXT:    cmpwi 30, 1
 ; CHECK-NEXT:  .LBB3_22: # %cmpxchg.trystore180
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stwcx. 30, 0, 11
-; CHECK-NEXT:    beq+ 0, .LBB3_24
+; CHECK-NEXT:    bc 12, 2, .LBB3_24
 ; CHECK-NEXT:  # %bb.23: # %cmpxchg.releasedload179
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 12, 0, 11
-; CHECK-NEXT:    cmplw 12, 0
-; CHECK-NEXT:    beq+ 0, .LBB3_22
+; CHECK-NEXT:    cmplw 1, 12, 0
+; CHECK-NEXT:    beq+ 1, .LBB3_22
 ; CHECK-NEXT:  .LBB3_24: # %cmpxchg.nostore177
 ; CHECK-NEXT:    addis 30, 2, sll at toc@ha
 ; CHECK-NEXT:    lwsync
@@ -4482,15 +4506,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.25: # %cmpxchg.fencedstore162
 ; CHECK-NEXT:    extsb 28, 28
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stdcx. 28, 0, 12
+; CHECK-NEXT:    mfocrf 28, 128
+; CHECK-NEXT:    srwi 28, 28, 28
+; CHECK-NEXT:    rlwinm 28, 28, 31, 31, 31
+; CHECK-NEXT:    cmpwi 28, 1
 ; CHECK-NEXT:  .LBB3_26: # %cmpxchg.trystore161
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stdcx. 28, 0, 12
-; CHECK-NEXT:    beq+ 0, .LBB3_28
+; CHECK-NEXT:    bc 12, 2, .LBB3_28
 ; CHECK-NEXT:  # %bb.27: # %cmpxchg.releasedload160
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 0, 0, 12
-; CHECK-NEXT:    cmpld 0, 29
-; CHECK-NEXT:    beq+ 0, .LBB3_26
+; CHECK-NEXT:    cmpld 1, 0, 29
+; CHECK-NEXT:    beq+ 1, .LBB3_26
 ; CHECK-NEXT:  .LBB3_28: # %cmpxchg.nostore158
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 0, sll at toc@l(30)
@@ -4504,15 +4532,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.29: # %cmpxchg.fencedstore143
 ; CHECK-NEXT:    extsb 27, 27
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stdcx. 27, 0, 0
+; CHECK-NEXT:    mfocrf 27, 128
+; CHECK-NEXT:    srwi 27, 27, 28
+; CHECK-NEXT:    rlwinm 27, 27, 31, 31, 31
+; CHECK-NEXT:    cmpwi 27, 1
 ; CHECK-NEXT:  .LBB3_30: # %cmpxchg.trystore142
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stdcx. 27, 0, 0
-; CHECK-NEXT:    beq+ 0, .LBB3_32
+; CHECK-NEXT:    bc 12, 2, .LBB3_32
 ; CHECK-NEXT:  # %bb.31: # %cmpxchg.releasedload141
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 29, 0, 0
-; CHECK-NEXT:    cmpld 29, 28
-; CHECK-NEXT:    beq+ 0, .LBB3_30
+; CHECK-NEXT:    cmpld 1, 29, 28
+; CHECK-NEXT:    beq+ 1, .LBB3_30
 ; CHECK-NEXT:  .LBB3_32: # %cmpxchg.nostore139
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 29, ull at toc@l(30)
@@ -4523,15 +4555,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne- 0, .LBB3_36
 ; CHECK-NEXT:  # %bb.33: # %cmpxchg.fencedstore124
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stbcx. 29, 0, 6
+; CHECK-NEXT:    mfocrf 29, 128
+; CHECK-NEXT:    srwi 29, 29, 28
+; CHECK-NEXT:    rlwinm 29, 29, 31, 31, 31
+; CHECK-NEXT:    cmpwi 29, 1
 ; CHECK-NEXT:  .LBB3_34: # %cmpxchg.trystore123
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stbcx. 29, 0, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_37
+; CHECK-NEXT:    bc 12, 2, .LBB3_37
 ; CHECK-NEXT:  # %bb.35: # %cmpxchg.releasedload122
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lbarx 28, 0, 6
-; CHECK-NEXT:    cmplw 28, 30
-; CHECK-NEXT:    beq+ 0, .LBB3_34
+; CHECK-NEXT:    lbarx 29, 0, 6
+; CHECK-NEXT:    cmplw 1, 29, 30
+; CHECK-NEXT:    beq+ 1, .LBB3_34
 ; CHECK-NEXT:  .LBB3_36: # %cmpxchg.nostore120
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4551,15 +4587,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    bne- 0, .LBB3_42
 ; CHECK-NEXT:  # %bb.39: # %cmpxchg.fencedstore105
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stbcx. 30, 0, 7
+; CHECK-NEXT:    mfocrf 30, 128
+; CHECK-NEXT:    srwi 30, 30, 28
+; CHECK-NEXT:    rlwinm 30, 30, 31, 31, 31
+; CHECK-NEXT:    cmpwi 30, 1
 ; CHECK-NEXT:  .LBB3_40: # %cmpxchg.trystore104
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stbcx. 30, 0, 7
-; CHECK-NEXT:    beq+ 0, .LBB3_43
+; CHECK-NEXT:    bc 12, 2, .LBB3_43
 ; CHECK-NEXT:  # %bb.41: # %cmpxchg.releasedload103
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lbarx 29, 0, 7
-; CHECK-NEXT:    cmplw 29, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_40
+; CHECK-NEXT:    lbarx 30, 0, 7
+; CHECK-NEXT:    cmplw 1, 30, 6
+; CHECK-NEXT:    beq+ 1, .LBB3_40
 ; CHECK-NEXT:  .LBB3_42: # %cmpxchg.nostore101
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4581,15 +4621,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 7, 7, 16
+; CHECK-NEXT:    sthcx. 7, 0, 8
+; CHECK-NEXT:    mfocrf 7, 128
+; CHECK-NEXT:    srwi 7, 7, 28
+; CHECK-NEXT:    rlwinm 7, 7, 31, 31, 31
+; CHECK-NEXT:    cmpwi 7, 1
 ; CHECK-NEXT:  .LBB3_46: # %cmpxchg.trystore85
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 7, 0, 8
-; CHECK-NEXT:    beq+ 0, .LBB3_49
+; CHECK-NEXT:    bc 12, 2, .LBB3_49
 ; CHECK-NEXT:  # %bb.47: # %cmpxchg.releasedload84
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lharx 30, 0, 8
-; CHECK-NEXT:    cmplw 30, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_46
+; CHECK-NEXT:    lharx 7, 0, 8
+; CHECK-NEXT:    cmplw 1, 7, 6
+; CHECK-NEXT:    beq+ 1, .LBB3_46
 ; CHECK-NEXT:  .LBB3_48: # %cmpxchg.nostore82
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4611,15 +4655,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 7, 7, 16
+; CHECK-NEXT:    sthcx. 7, 0, 9
+; CHECK-NEXT:    mfocrf 7, 128
+; CHECK-NEXT:    srwi 7, 7, 28
+; CHECK-NEXT:    rlwinm 7, 7, 31, 31, 31
+; CHECK-NEXT:    cmpwi 7, 1
 ; CHECK-NEXT:  .LBB3_52: # %cmpxchg.trystore66
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 7, 0, 9
-; CHECK-NEXT:    beq+ 0, .LBB3_55
+; CHECK-NEXT:    bc 12, 2, .LBB3_55
 ; CHECK-NEXT:  # %bb.53: # %cmpxchg.releasedload65
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lharx 8, 0, 9
-; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_52
+; CHECK-NEXT:    lharx 7, 0, 9
+; CHECK-NEXT:    cmplw 1, 7, 6
+; CHECK-NEXT:    beq+ 1, .LBB3_52
 ; CHECK-NEXT:  .LBB3_54: # %cmpxchg.nostore63
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4640,15 +4688,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.57: # %cmpxchg.fencedstore48
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stwcx. 7, 0, 10
+; CHECK-NEXT:    mfocrf 7, 128
+; CHECK-NEXT:    srwi 7, 7, 28
+; CHECK-NEXT:    rlwinm 7, 7, 31, 31, 31
+; CHECK-NEXT:    cmpwi 7, 1
 ; CHECK-NEXT:  .LBB3_58: # %cmpxchg.trystore47
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stwcx. 7, 0, 10
-; CHECK-NEXT:    beq+ 0, .LBB3_61
+; CHECK-NEXT:    bc 12, 2, .LBB3_61
 ; CHECK-NEXT:  # %bb.59: # %cmpxchg.releasedload46
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lwarx 8, 0, 10
-; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_58
+; CHECK-NEXT:    lwarx 7, 0, 10
+; CHECK-NEXT:    cmplw 1, 7, 6
+; CHECK-NEXT:    beq+ 1, .LBB3_58
 ; CHECK-NEXT:  .LBB3_60: # %cmpxchg.nostore44
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4669,15 +4721,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.63: # %cmpxchg.fencedstore29
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stwcx. 7, 0, 11
+; CHECK-NEXT:    mfocrf 7, 128
+; CHECK-NEXT:    srwi 7, 7, 28
+; CHECK-NEXT:    rlwinm 7, 7, 31, 31, 31
+; CHECK-NEXT:    cmpwi 7, 1
 ; CHECK-NEXT:  .LBB3_64: # %cmpxchg.trystore28
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stwcx. 7, 0, 11
-; CHECK-NEXT:    beq+ 0, .LBB3_67
+; CHECK-NEXT:    bc 12, 2, .LBB3_67
 ; CHECK-NEXT:  # %bb.65: # %cmpxchg.releasedload27
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    lwarx 8, 0, 11
-; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_64
+; CHECK-NEXT:    lwarx 7, 0, 11
+; CHECK-NEXT:    cmplw 1, 7, 6
+; CHECK-NEXT:    beq+ 1, .LBB3_64
 ; CHECK-NEXT:  .LBB3_66: # %cmpxchg.nostore25
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4698,15 +4754,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.69: # %cmpxchg.fencedstore10
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stdcx. 7, 0, 12
+; CHECK-NEXT:    mfocrf 7, 128
+; CHECK-NEXT:    srwi 7, 7, 28
+; CHECK-NEXT:    rlwinm 7, 7, 31, 31, 31
+; CHECK-NEXT:    cmpwi 7, 1
 ; CHECK-NEXT:  .LBB3_70: # %cmpxchg.trystore9
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stdcx. 7, 0, 12
-; CHECK-NEXT:    beq+ 0, .LBB3_73
+; CHECK-NEXT:    bc 12, 2, .LBB3_73
 ; CHECK-NEXT:  # %bb.71: # %cmpxchg.releasedload8
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    ldarx 8, 0, 12
-; CHECK-NEXT:    cmpld 8, 6
-; CHECK-NEXT:    beq+ 0, .LBB3_70
+; CHECK-NEXT:    ldarx 7, 0, 12
+; CHECK-NEXT:    cmpld 1, 7, 6
+; CHECK-NEXT:    beq+ 1, .LBB3_70
 ; CHECK-NEXT:  .LBB3_72: # %cmpxchg.nostore6
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4727,15 +4787,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:  # %bb.75: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    extsb 4, 4
 ; CHECK-NEXT:    sync
+; CHECK-NEXT:    stdcx. 4, 0, 0
+; CHECK-NEXT:    mfocrf 4, 128
+; CHECK-NEXT:    srwi 4, 4, 28
+; CHECK-NEXT:    rlwinm 4, 4, 31, 31, 31
+; CHECK-NEXT:    cmpwi 4, 1
 ; CHECK-NEXT:  .LBB3_76: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stdcx. 4, 0, 0
-; CHECK-NEXT:    beq+ 0, .LBB3_79
+; CHECK-NEXT:    bc 12, 2, .LBB3_79
 ; CHECK-NEXT:  # %bb.77: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    ldarx 6, 0, 0
-; CHECK-NEXT:    cmpld 6, 3
-; CHECK-NEXT:    beq+ 0, .LBB3_76
+; CHECK-NEXT:    ldarx 4, 0, 0
+; CHECK-NEXT:    cmpld 1, 4, 3
+; CHECK-NEXT:    beq+ 1, .LBB3_76
 ; CHECK-NEXT:  .LBB3_78: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4928,15 +4992,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:  # %bb.17: # %cmpxchg.fencedstore171
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
+; AIX32-NEXT:    stwcx. 5, 0, 15
+; AIX32-NEXT:    mfocrf 5, 128
+; AIX32-NEXT:    srwi 5, 5, 28
+; AIX32-NEXT:    rlwinm 5, 5, 31, 31, 31
+; AIX32-NEXT:    cmpwi 5, 1
 ; AIX32-NEXT:  L..BB3_18: # %cmpxchg.trystore170
 ; AIX32-NEXT:    #
-; AIX32-NEXT:    stwcx. 5, 0, 15
-; AIX32-NEXT:    beq+ 0, L..BB3_20
+; AIX32-NEXT:    bc 12, 2, L..BB3_20
 ; AIX32-NEXT:  # %bb.19: # %cmpxchg.releasedload169
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 15
-; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    beq+ 0, L..BB3_18
+; AIX32-NEXT:    cmplw 1, 3, 4
+; AIX32-NEXT:    beq+ 1, L..BB3_18
 ; AIX32-NEXT:  L..BB3_20: # %cmpxchg.nostore167
 ; AIX32-NEXT:    lwsync
 ; AIX32-NEXT:    lwz 28, L..C5(2) # @ui
@@ -4949,15 +5017,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:  # %bb.21: # %cmpxchg.fencedstore152
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
+; AIX32-NEXT:    stwcx. 5, 0, 28
+; AIX32-NEXT:    mfocrf 5, 128
+; AIX32-NEXT:    srwi 5, 5, 28
+; AIX32-NEXT:    rlwinm 5, 5, 31, 31, 31
+; AIX32-NEXT:    cmpwi 5, 1
 ; AIX32-NEXT:  L..BB3_22: # %cmpxchg.trystore151
 ; AIX32-NEXT:    #
-; AIX32-NEXT:    stwcx. 5, 0, 28
-; AIX32-NEXT:    beq+ 0, L..BB3_24
+; AIX32-NEXT:    bc 12, 2, L..BB3_24
 ; AIX32-NEXT:  # %bb.23: # %cmpxchg.releasedload150
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 28
-; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    beq+ 0, L..BB3_22
+; AIX32-NEXT:    cmplw 1, 3, 4
+; AIX32-NEXT:    beq+ 1, L..BB3_22
 ; AIX32-NEXT:  L..BB3_24: # %cmpxchg.nostore148
 ; AIX32-NEXT:    lwsync
 ; AIX32-NEXT:    stw 3, 0(28)
@@ -5148,15 +5220,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:  # %bb.49: # %cmpxchg.fencedstore10
 ; AIX32-NEXT:    extsb 4, 4
 ; AIX32-NEXT:    sync
+; AIX32-NEXT:    stwcx. 4, 0, 15
+; AIX32-NEXT:    mfocrf 4, 128
+; AIX32-NEXT:    srwi 4, 4, 28
+; AIX32-NEXT:    rlwinm 4, 4, 31, 31, 31
+; AIX32-NEXT:    cmpwi 4, 1
 ; AIX32-NEXT:  L..BB3_50: # %cmpxchg.trystore9
 ; AIX32-NEXT:    #
-; AIX32-NEXT:    stwcx. 4, 0, 15
-; AIX32-NEXT:    beq+ 0, L..BB3_53
+; AIX32-NEXT:    bc 12, 2, L..BB3_53
 ; AIX32-NEXT:  # %bb.51: # %cmpxchg.releasedload8
 ; AIX32-NEXT:    #
-; AIX32-NEXT:    lwarx 5, 0, 15
-; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    beq+ 0, L..BB3_50
+; AIX32-NEXT:    lwarx 4, 0, 15
+; AIX32-NEXT:    cmplw 1, 4, 3
+; AIX32-NEXT:    beq+ 1, L..BB3_50
 ; AIX32-NEXT:  L..BB3_52: # %cmpxchg.nostore6
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5177,15 +5253,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:  # %bb.55: # %cmpxchg.fencedstore
 ; AIX32-NEXT:    extsb 4, 4
 ; AIX32-NEXT:    sync
+; AIX32-NEXT:    stwcx. 4, 0, 28
+; AIX32-NEXT:    mfocrf 4, 128
+; AIX32-NEXT:    srwi 4, 4, 28
+; AIX32-NEXT:    rlwinm 4, 4, 31, 31, 31
+; AIX32-NEXT:    cmpwi 4, 1
 ; AIX32-NEXT:  L..BB3_56: # %cmpxchg.trystore
 ; AIX32-NEXT:    #
-; AIX32-NEXT:    stwcx. 4, 0, 28
-; AIX32-NEXT:    beq+ 0, L..BB3_59
+; AIX32-NEXT:    bc 12, 2, L..BB3_59
 ; AIX32-NEXT:  # %bb.57: # %cmpxchg.releasedload
 ; AIX32-NEXT:    #
-; AIX32-NEXT:    lwarx 5, 0, 28
-; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    beq+ 0, L..BB3_56
+; AIX32-NEXT:    lwarx 4, 0, 28
+; AIX32-NEXT:    cmplw 1, 4, 3
+; AIX32-NEXT:    beq+ 1, L..BB3_56
 ; AIX32-NEXT:  L..BB3_58: # %cmpxchg.nostore
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index cfc3a99ab9fe9..1a4444bc8b170 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -467,18 +467,22 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB43_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB43_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB43_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
   ret void
@@ -492,18 +496,22 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB44_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB44_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB44_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB44_2
 ; PPC64LE-NEXT:  .LBB44_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -519,17 +527,21 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB45_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB45_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB45_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB45_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB45_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB45_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -546,17 +558,21 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB46_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB46_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB46_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB46_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB46_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB46_2
 ; PPC64LE-NEXT:  .LBB46_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -572,17 +588,21 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB47_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB47_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB47_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB47_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB47_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB47_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -599,17 +619,21 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB48_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB48_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB48_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB48_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB48_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB48_2
 ; PPC64LE-NEXT:  .LBB48_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -625,17 +649,21 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB49_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB49_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB49_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB49_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB49_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB49_2
 ; PPC64LE-NEXT:  .LBB49_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -713,18 +741,22 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB53_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB53_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB53_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
   ret void
@@ -738,18 +770,22 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB54_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB54_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB54_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB54_2
 ; PPC64LE-NEXT:  .LBB54_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -765,17 +801,21 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB55_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB55_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB55_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB55_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB55_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB55_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -792,17 +832,21 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB56_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB56_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB56_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB56_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB56_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB56_2
 ; PPC64LE-NEXT:  .LBB56_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -818,17 +862,21 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB57_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB57_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB57_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB57_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB57_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB57_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -845,17 +893,21 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB58_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB58_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB58_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB58_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB58_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB58_2
 ; PPC64LE-NEXT:  .LBB58_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -871,17 +923,21 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB59_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB59_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB59_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB59_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB59_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB59_2
 ; PPC64LE-NEXT:  .LBB59_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -953,16 +1009,20 @@ define void @test63(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB63_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB63_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB63_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
   ret void
@@ -976,16 +1036,20 @@ define void @test64(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB64_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB64_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB64_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB64_2
 ; PPC64LE-NEXT:  .LBB64_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1001,15 +1065,19 @@ define void @test65(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB65_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB65_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB65_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB65_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB65_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB65_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1026,15 +1094,19 @@ define void @test66(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB66_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB66_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB66_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB66_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB66_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB66_2
 ; PPC64LE-NEXT:  .LBB66_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1050,15 +1122,19 @@ define void @test67(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB67_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB67_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB67_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB67_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB67_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB67_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1075,15 +1151,19 @@ define void @test68(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB68_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB68_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB68_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB68_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB68_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB68_2
 ; PPC64LE-NEXT:  .LBB68_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1099,15 +1179,19 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB69_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB69_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB69_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB69_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB69_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB69_2
 ; PPC64LE-NEXT:  .LBB69_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1179,16 +1263,20 @@ define void @test73(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB73_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB73_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB73_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
   ret void
@@ -1202,16 +1290,20 @@ define void @test74(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB74_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB74_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB74_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB74_2
 ; PPC64LE-NEXT:  .LBB74_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1227,15 +1319,19 @@ define void @test75(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB75_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB75_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB75_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB75_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB75_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB75_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1252,15 +1348,19 @@ define void @test76(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB76_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB76_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB76_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB76_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB76_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB76_2
 ; PPC64LE-NEXT:  .LBB76_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1276,15 +1376,19 @@ define void @test77(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB77_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB77_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB77_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB77_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB77_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB77_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1301,15 +1405,19 @@ define void @test78(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB78_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB78_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB78_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB78_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB78_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB78_2
 ; PPC64LE-NEXT:  .LBB78_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1325,15 +1433,19 @@ define void @test79(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB79_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB79_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB79_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB79_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB79_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB79_2
 ; PPC64LE-NEXT:  .LBB79_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1411,18 +1523,22 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB83_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB83_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB83_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
   ret void
@@ -1436,18 +1552,22 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB84_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB84_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB84_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB84_2
 ; PPC64LE-NEXT:  .LBB84_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1463,17 +1583,21 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB85_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB85_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB85_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB85_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB85_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB85_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1490,17 +1614,21 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB86_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB86_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB86_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB86_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB86_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB86_2
 ; PPC64LE-NEXT:  .LBB86_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1516,17 +1644,21 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB87_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB87_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB87_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB87_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB87_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB87_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1543,17 +1675,21 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB88_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB88_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB88_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB88_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB88_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB88_2
 ; PPC64LE-NEXT:  .LBB88_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1569,17 +1705,21 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB89_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stbcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB89_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB89_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB89_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lbarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB89_2
+; PPC64LE-NEXT:    lbarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB89_2
 ; PPC64LE-NEXT:  .LBB89_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1657,18 +1797,22 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB93_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB93_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB93_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
   ret void
@@ -1682,18 +1826,22 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB94_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB94_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB94_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB94_2
 ; PPC64LE-NEXT:  .LBB94_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1709,17 +1857,21 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB95_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB95_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB95_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB95_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB95_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB95_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1736,17 +1888,21 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB96_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB96_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB96_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB96_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB96_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB96_2
 ; PPC64LE-NEXT:  .LBB96_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1762,17 +1918,21 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB97_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB97_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB97_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB97_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB97_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB97_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1789,17 +1949,21 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB98_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB98_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB98_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB98_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB98_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB98_2
 ; PPC64LE-NEXT:  .LBB98_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1815,17 +1979,21 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    cmplw 6, 4
 ; PPC64LE-NEXT:    bne- 0, .LBB99_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
-; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
+; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    sthcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB99_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB99_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB99_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lharx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB99_2
+; PPC64LE-NEXT:    lharx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB99_2
 ; PPC64LE-NEXT:  .LBB99_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1897,16 +2065,20 @@ define void @test103(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB103_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB103_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB103_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
   ret void
@@ -1920,16 +2092,20 @@ define void @test104(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB104_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB104_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB104_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB104_2
 ; PPC64LE-NEXT:  .LBB104_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1945,15 +2121,19 @@ define void @test105(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB105_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB105_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB105_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB105_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB105_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB105_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -1970,15 +2150,19 @@ define void @test106(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB106_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB106_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB106_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB106_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB106_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB106_2
 ; PPC64LE-NEXT:  .LBB106_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1994,15 +2178,19 @@ define void @test107(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB107_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB107_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB107_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB107_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB107_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB107_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -2019,15 +2207,19 @@ define void @test108(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB108_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB108_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB108_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB108_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB108_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB108_2
 ; PPC64LE-NEXT:  .LBB108_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2043,15 +2235,19 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB109_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stwcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB109_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB109_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB109_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    lwarx 6, 0, 3
-; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB109_2
+; PPC64LE-NEXT:    lwarx 5, 0, 3
+; PPC64LE-NEXT:    cmplw 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB109_2
 ; PPC64LE-NEXT:  .LBB109_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2123,16 +2319,20 @@ define void @test113(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB113_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB113_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB113_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
   ret void
@@ -2146,16 +2346,20 @@ define void @test114(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB114_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
+; PPC64LE-NEXT:    .p2align 4
 ; PPC64LE-NEXT:  .LBB114_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr+ 0
+; PPC64LE-NEXT:    bclr 12, 2, 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB114_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB114_2
 ; PPC64LE-NEXT:  .LBB114_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2171,15 +2375,19 @@ define void @test115(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB115_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB115_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB115_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB115_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB115_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB115_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -2196,15 +2404,19 @@ define void @test116(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB116_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB116_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB116_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB116_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB116_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB116_2
 ; PPC64LE-NEXT:  .LBB116_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2220,15 +2432,19 @@ define void @test117(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB117_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB117_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB117_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB117_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB117_2
 ; PPC64LE-NEXT:    blr
 ; PPC64LE-NEXT:  .LBB117_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
@@ -2245,15 +2461,19 @@ define void @test118(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB118_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB118_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB118_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB118_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB118_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB118_2
 ; PPC64LE-NEXT:  .LBB118_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2269,15 +2489,19 @@ define void @test119(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-NEXT:    bne- 0, .LBB119_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
+; PPC64LE-NEXT:    stdcx. 5, 0, 3
+; PPC64LE-NEXT:    mfocrf 5, 128
+; PPC64LE-NEXT:    srwi 5, 5, 28
+; PPC64LE-NEXT:    rlwinm 5, 5, 31, 31, 31
+; PPC64LE-NEXT:    cmpwi 5, 1
 ; PPC64LE-NEXT:  .LBB119_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq+ 0, .LBB119_4
+; PPC64LE-NEXT:    bc 12, 2, .LBB119_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
-; PPC64LE-NEXT:    ldarx 6, 0, 3
-; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq+ 0, .LBB119_2
+; PPC64LE-NEXT:    ldarx 5, 0, 3
+; PPC64LE-NEXT:    cmpld 1, 5, 4
+; PPC64LE-NEXT:    beq+ 1, .LBB119_2
 ; PPC64LE-NEXT:  .LBB119_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 54a35dab2a422..782b71192c41e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -268,15 +268,19 @@ define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    li r5, 1
 ; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    stwcx. r5, 0, r4
+; CHECK-NEXT:    mfcr r5 # cr0
+; CHECK-NEXT:    srwi r5, r5, 28
+; CHECK-NEXT:    rlwinm r5, r5, 31, 31, 31
+; CHECK-NEXT:    cmpwi r5, 1
 ; CHECK-NEXT:  .LBB10_2: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    stwcx. r5, 0, r4
-; CHECK-NEXT:    beq+ cr0, .LBB10_4
+; CHECK-NEXT:    bc 12, eq, .LBB10_4
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx r3, 0, r4
-; CHECK-NEXT:    cmplwi r3, 0
-; CHECK-NEXT:    beq+ cr0, .LBB10_2
+; CHECK-NEXT:    cmplwi cr1, r3, 0
+; CHECK-NEXT:    beq+ cr1, .LBB10_2
 ; CHECK-NEXT:  .LBB10_4: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll b/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll
index d3ff60ef6ba1d..116a127da0a20 100644
--- a/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll
+++ b/llvm/test/Transforms/LICM/hoist-inaccesiblemem-call.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<target-ir>,loop-mssa(licm)' < %s -S | FileCheck %s
 
 
-define dso_local i32 @loop_alias(i32 %x, ptr %a, ptr %b)  #0{
+define dso_local i32 @loop_alias(i32 %x, ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: define dso_local i32 @loop_alias(
 ; CHECK-SAME: i32 [[X:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
diff --git a/llvm/test/Transforms/LICM/pr50367.ll b/llvm/test/Transforms/LICM/pr50367.ll
index 6aafff74f61d8..3e3e5c92cd481 100644
--- a/llvm/test/Transforms/LICM/pr50367.ll
+++ b/llvm/test/Transforms/LICM/pr50367.ll
@@ -44,6 +44,9 @@ define void @store_null(i1 %arg) {
 ; CHECK-LABEL: define void @store_null(
 ; CHECK-SAME: i1 [[ARG:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    store ptr null, ptr @e, align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    [[PTR:%.*]] = load ptr, ptr @e, align 8, !tbaa [[ANYPTR_TBAA0]]
+; CHECK-NEXT:    store i32 0, ptr [[PTR]], align 4, !tbaa [[INT_TBAA4]]
 ; CHECK-NEXT:    br label %[[LOOP1:.*]]
 ; CHECK:       [[LOOP1]]:
 ; CHECK-NEXT:    br label %[[LOOP2:.*]]
@@ -53,7 +56,6 @@ define void @store_null(i1 %arg) {
 ; CHECK-NEXT:    store i32 0, ptr null, align 4
 ; CHECK-NEXT:    br label %[[LOOP2]]
 ; CHECK:       [[LOOP_LATCH]]:
-; CHECK-NEXT:    store i32 0, ptr null, align 4, !tbaa [[INT_TBAA4]]
 ; CHECK-NEXT:    br label %[[LOOP1]]
 ;
 entry:
diff --git a/llvm/test/Transforms/LICM/store-hoisting.ll b/llvm/test/Transforms/LICM/store-hoisting.ll
index d4f62d1c16f06..09d3aad256e04 100644
--- a/llvm/test/Transforms/LICM/store-hoisting.ll
+++ b/llvm/test/Transforms/LICM/store-hoisting.ll
@@ -32,7 +32,7 @@ loop:
   store i32 0, ptr %loc
   %iv.next = add i32 %iv, 1
   br i1 %earlycnd, label %exit1, label %backedge
-  
+
 backedge:
   %cmp = icmp slt i32 %iv, 200
   br i1 %cmp, label %loop, label %exit2
@@ -122,10 +122,9 @@ exit:
 ; Hoisting the store is actually valid here, as it dominates the load.
 define void @neg_ref(ptr %loc) {
 ; CHECK-LABEL: @neg_ref
-; CHECK-LABEL: exit1:
-; CHECK: store i32 0, ptr %loc
-; CHECK-LABEL: exit2:
+; CHECK-LABEL: entry:
 ; CHECK: store i32 0, ptr %loc
+; CHECK-LABEL: loop:
 entry:
   br label %loop
 
@@ -135,7 +134,7 @@ loop:
   %v = load i32, ptr %loc
   %earlycnd = icmp eq i32 %v, 198
   br i1 %earlycnd, label %exit1, label %backedge
-  
+
 backedge:
   %iv.next = add i32 %iv, 1
   %cmp = icmp slt i32 %iv, 200
@@ -164,7 +163,7 @@ loop:
   store i32 0, ptr %loc
   %earlycnd = icmp eq i32 %v, 198
   br i1 %earlycnd, label %exit1, label %backedge
-  
+
 backedge:
   %iv.next = add i32 %iv, 1
   %cmp = icmp slt i32 %iv, 200
@@ -388,7 +387,7 @@ declare void @readonly() readonly
 ; TODO: can legally hoist since value read by call is known
 define void @test_dominated_readonly(ptr %loc) {
 ; CHECK-LABEL: @test_dominated_readonly
-; CHECK-LABEL: loop:
+; CHECK-LABEL: entry:
 ; CHECK: store i32 0, ptr %loc
 ; CHECK-LABEL: exit:
 entry:



More information about the llvm-commits mailing list