[llvm] 25a4b19 - Handle part-word LL/SC in atomic expansion pass

Krzysztof Parzyszek via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 28 08:08:14 PDT 2020


Author: Krzysztof Parzyszek
Date: 2020-04-28T10:07:39-05:00
New Revision: 25a4b1904c10142ea22e84cddf6db2ca059a9631

URL: https://github.com/llvm/llvm-project/commit/25a4b1904c10142ea22e84cddf6db2ca059a9631
DIFF: https://github.com/llvm/llvm-project/commit/25a4b1904c10142ea22e84cddf6db2ca059a9631.diff

LOG: Handle part-word LL/SC in atomic expansion pass

Differential Revision: https://reviews.llvm.org/D77213

Added: 
    llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll
    llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll

Modified: 
    llvm/lib/CodeGen/AtomicExpandPass.cpp
    llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
    llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
    llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
    llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
    llvm/test/Transforms/AtomicExpand/SPARC/partword.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 1def0d2bd85a..9bec110604d9 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -570,8 +570,8 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
     unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
     unsigned ValueSize = getAtomicOpSize(AI);
     if (ValueSize < MinCASSize) {
-      llvm_unreachable(
-          "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
+      expandPartwordAtomicRMW(AI,
+                              TargetLoweringBase::AtomicExpansionKind::LLSC);
     } else {
       auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
         return performAtomicOp(AI->getOperation(), Builder, Loaded,
@@ -608,16 +608,43 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
 
 namespace {
 
-/// Result values from createMaskInstrs helper.
 struct PartwordMaskValues {
-  Type *WordType;
-  Type *ValueType;
-  Value *AlignedAddr;
-  Value *ShiftAmt;
-  Value *Mask;
-  Value *Inv_Mask;
+  // These three fields are guaranteed to be set by createMaskInstrs.
+  Type *WordType = nullptr;
+  Type *ValueType = nullptr;
+  Value *AlignedAddr = nullptr;
+  // The remaining fields can be null.
+  Value *ShiftAmt = nullptr;
+  Value *Mask = nullptr;
+  Value *Inv_Mask = nullptr;
 };
 
+LLVM_ATTRIBUTE_UNUSED
+raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
+  auto PrintObj = [&O](auto *V) {
+    if (V)
+      O << *V;
+    else
+      O << "nullptr";
+    O << '\n';
+  };
+  O << "PartwordMaskValues {\n";
+  O << "  WordType: ";
+  PrintObj(PMV.WordType);
+  O << "  ValueType: ";
+  PrintObj(PMV.ValueType);
+  O << "  AlignedAddr: ";
+  PrintObj(PMV.AlignedAddr);
+  O << "  ShiftAmt: ";
+  PrintObj(PMV.ShiftAmt);
+  O << "  Mask: ";
+  PrintObj(PMV.Mask);
+  O << "  Inv_Mask: ";
+  PrintObj(PMV.Inv_Mask);
+  O << "}\n";
+  return O;
+}
+
 } // end anonymous namespace
 
 /// This is a helper function which builds instructions to provide
@@ -638,48 +665,74 @@ struct PartwordMaskValues {
 /// Inv_Mask: The inverse of Mask.
 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
                                            Type *ValueType, Value *Addr,
-                                           unsigned WordSize) {
-  PartwordMaskValues Ret;
+                                           unsigned MinWordSize) {
+  PartwordMaskValues PMV;
 
-  BasicBlock *BB = I->getParent();
-  Function *F = BB->getParent();
   Module *M = I->getModule();
-
-  LLVMContext &Ctx = F->getContext();
+  LLVMContext &Ctx = M->getContext();
   const DataLayout &DL = M->getDataLayout();
-
   unsigned ValueSize = DL.getTypeStoreSize(ValueType);
 
-  assert(ValueSize < WordSize);
+  PMV.ValueType = ValueType;
+  PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
+                                         : ValueType;
+  if (PMV.ValueType == PMV.WordType) {
+    PMV.AlignedAddr = Addr;
+    return PMV;
+  }
 
-  Ret.ValueType = ValueType;
-  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
+  assert(ValueSize < MinWordSize);
 
   Type *WordPtrType =
-      Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
+      PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
 
   Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
-  Ret.AlignedAddr = Builder.CreateIntToPtr(
-      Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
+  PMV.AlignedAddr = Builder.CreateIntToPtr(
+      Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
       "AlignedAddr");
 
-  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
+  Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
   if (DL.isLittleEndian()) {
     // turn bytes into bits
-    Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
+    PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
   } else {
     // turn bytes into bits, and count from the other side.
-    Ret.ShiftAmt =
-        Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
+    PMV.ShiftAmt = Builder.CreateShl(
+        Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
   }
 
-  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
-  Ret.Mask = Builder.CreateShl(
-      ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
+  PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
+  PMV.Mask = Builder.CreateShl(
+      ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
       "Mask");
-  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
+  PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
+  return PMV;
+}
+
+static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+                                 const PartwordMaskValues &PMV) {
+  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+  if (PMV.WordType == PMV.ValueType)
+    return WideWord;
 
-  return Ret;
+  Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
+  Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
+  return Trunc;
+}
+
+static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord,
+                                Value *Updated, const PartwordMaskValues &PMV) {
+  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
+  assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
+  if (PMV.WordType == PMV.ValueType)
+    return Updated;
+
+  Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
+  Value *Shift =
+      Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
+  Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
+  Value *Or = Builder.CreateOr(And, Shift, "inserted");
+  return Or;
 }
 
 /// Emit IR to implement a masked version of a given atomicrmw
@@ -719,13 +772,9 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
     // Finally, comparison ops will operate on the full value, so
     // truncate down to the original size, and expand out again after
     // doing the operation.
-    Value *Loaded_Shiftdown = Builder.CreateTrunc(
-        Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
-    Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
-    Value *NewVal_Shiftup = Builder.CreateShl(
-        Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
-    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
-    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
+    Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
+    Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
+    Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
     return FinalVal;
   }
   default:
@@ -738,12 +787,10 @@ static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
 ///
 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
 /// way as a typical atomicrmw expansion. The only 
diff erence here is
-/// that the operation inside of the loop must operate only upon a
+/// that the operation inside of the loop may operate upon only a
 /// part of the value.
 void AtomicExpand::expandPartwordAtomicRMW(
     AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
-  assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
-
   AtomicOrdering MemOpOrder = AI->getOrdering();
 
   IRBuilder<> Builder(AI);
@@ -761,13 +808,18 @@ void AtomicExpand::expandPartwordAtomicRMW(
                                  ValOperand_Shifted, AI->getValOperand(), PMV);
   };
 
-  // TODO: When we're ready to support LLSC conversions too, use
-  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
-  Value *OldResult =
-      insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
-                           PerformPartwordOp, createCmpXchgInstFun);
-  Value *FinalOldResult = Builder.CreateTrunc(
-      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+  Value *OldResult;
+  if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
+    OldResult =
+        insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
+                             PerformPartwordOp, createCmpXchgInstFun);
+  } else {
+    assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
+    OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
+                                  MemOpOrder, PerformPartwordOp);
+  }
+
+  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
   AI->replaceAllUsesWith(FinalOldResult);
   AI->eraseFromParent();
 }
@@ -800,8 +852,7 @@ AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
   AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
                                                  NewOperand, AI->getOrdering());
 
-  Value *FinalOldResult = Builder.CreateTrunc(
-      Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
+  Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
   AI->replaceAllUsesWith(FinalOldResult);
   AI->eraseFromParent();
   return NewAI;
@@ -923,8 +974,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
   // partword.cmpxchg.end:
   Builder.SetInsertPoint(CI);
 
-  Value *FinalOldVal = Builder.CreateTrunc(
-      Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
+  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
   Value *Res = UndefValue::get(CI->getType());
   Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
   Res = Builder.CreateInsertValue(Res, Success, 1);
@@ -965,8 +1015,7 @@ void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
   Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
       Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
       AI->getOrdering());
-  Value *FinalOldResult = Builder.CreateTrunc(
-      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
+  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
   AI->replaceAllUsesWith(FinalOldResult);
   AI->eraseFromParent();
 }
@@ -987,9 +1036,7 @@ void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
   Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
       Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
       CI->getSuccessOrdering());
-  Value *FinalOldVal = Builder.CreateTrunc(
-      Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
-
+  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
   Value *Res = UndefValue::get(CI->getType());
   Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
   Value *Success = Builder.CreateICmpEQ(
@@ -1126,24 +1173,28 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   //
   // The full expansion we produce is:
   //     [...]
+  // %aligned.addr = ...
   // cmpxchg.start:
-  //     %unreleasedload = @load.linked(%addr)
-  //     %should_store = icmp eq %unreleasedload, %desired
-  //     br i1 %should_store, label %cmpxchg.fencedstore,
+  //     %unreleasedload = @load.linked(%aligned.addr)
+  //     %unreleasedload.extract = extract value from %unreleasedload
+  //     %should_store = icmp eq %unreleasedload.extract, %desired
+  //     br i1 %should_store, label %cmpxchg.releasingstore,
   //                          label %cmpxchg.nostore
   // cmpxchg.releasingstore:
   //     fence?
   //     br label cmpxchg.trystore
   // cmpxchg.trystore:
-  //     %loaded.trystore = phi [%unreleasedload, %releasingstore],
+  //     %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
   //                            [%releasedload, %cmpxchg.releasedload]
-  //     %stored = @store_conditional(%new, %addr)
+  //     %updated.new = insert %new into %loaded.trystore
+  //     %stored = @store_conditional(%updated.new, %aligned.addr)
   //     %success = icmp eq i32 %stored, 0
   //     br i1 %success, label %cmpxchg.success,
   //                     label %cmpxchg.releasedload/%cmpxchg.failure
   // cmpxchg.releasedload:
-  //     %releasedload = @load.linked(%addr)
-  //     %should_store = icmp eq %releasedload, %desired
+  //     %releasedload = @load.linked(%aligned.addr)
+  //     %releasedload.extract = extract value from %releasedload
+  //     %should_store = icmp eq %releasedload.extract, %desired
   //     br i1 %should_store, label %cmpxchg.trystore,
   //                          label %cmpxchg.failure
   // cmpxchg.success:
@@ -1159,9 +1210,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   //     fence?
   //     br label %cmpxchg.end
   // cmpxchg.end:
-  //     %loaded = phi [%loaded.nostore, %cmpxchg.failure],
-  //                   [%loaded.trystore, %cmpxchg.trystore]
+  //     %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
+  //                        [%loaded.trystore, %cmpxchg.trystore]
   //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+  //     %loaded = extract value from %loaded.exit
   //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
   //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
   //     [...]
@@ -1187,13 +1239,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.SetInsertPoint(BB);
   if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
     TLI->emitLeadingFence(Builder, CI, SuccessOrder);
+
+  PartwordMaskValues PMV =
+      createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
+                       TLI->getMinCmpXchgSizeInBits() / 8);
   Builder.CreateBr(StartBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(StartBB);
-  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *UnreleasedLoad =
+      TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+  Value *UnreleasedLoadExtract =
+      extractMaskedValue(Builder, UnreleasedLoad, PMV);
   Value *ShouldStore = Builder.CreateICmpEQ(
-      UnreleasedLoad, CI->getCompareOperand(), "should_store");
+      UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
@@ -1205,8 +1264,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.CreateBr(TryStoreBB);
 
   Builder.SetInsertPoint(TryStoreBB);
-  Value *StoreSuccess = TLI->emitStoreConditional(
-      Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+  PHINode *LoadedTryStore =
+      Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
+  LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+  Value *NewValueInsert =
+      insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
+  Value *StoreSuccess =
+      TLI->emitStoreConditional(Builder, NewValueInsert, Addr, MemOpOrder);
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
@@ -1216,13 +1280,16 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
   if (HasReleasedLoadBB) {
-    SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
-    ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
-                                       "should_store");
+    SecondLoad = TLI->emitLoadLinked(Builder, PMV.AlignedAddr, MemOpOrder);
+    Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
+    ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
+                                       CI->getCompareOperand(), "should_store");
 
     // If the cmpxchg doesn't actually need any ordering when it fails, we can
     // jump straight past that fence instruction (if it exists).
     Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+    // Update PHI node in TryStoreBB.
+    LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
     Builder.CreateUnreachable();
 
@@ -1234,6 +1301,12 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.CreateBr(ExitBB);
 
   Builder.SetInsertPoint(NoStoreBB);
+  PHINode *LoadedNoStore =
+      Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
+  LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
+  if (HasReleasedLoadBB)
+    LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
+
   // In the failing case, where we don't execute the store-conditional, the
   // target might want to balance out the load-linked with a dedicated
   // instruction (e.g., on ARM, clearing the exclusive monitor).
@@ -1241,6 +1314,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   Builder.CreateBr(FailureBB);
 
   Builder.SetInsertPoint(FailureBB);
+  PHINode *LoadedFailure =
+      Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
+  LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
+  if (CI->isWeak())
+    LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
   if (ShouldInsertFencesForAtomic)
     TLI->emitTrailingFence(Builder, CI, FailureOrder);
   Builder.CreateBr(ExitBB);
@@ -1250,32 +1328,20 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
   // PHI.
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+  PHINode *LoadedExit =
+      Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
+  LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
+  LoadedExit->addIncoming(LoadedFailure, FailureBB);
+  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
   Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
   Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
 
-  // Setup the builder so we can create any PHIs we need.
-  Value *Loaded;
-  if (!HasReleasedLoadBB)
-    Loaded = UnreleasedLoad;
-  else {
-    Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
-    PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
-    TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
-    TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
-
-    Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
-    PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
-    NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
-    NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
-
-    Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
-    PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
-    ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
-    ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
-
-    Loaded = ExitLoaded;
-  }
+  // This is the "exit value" from the cmpxchg expansion. It may be of
+  // a type wider than the one in the cmpxchg instruction.
+  Value *LoadedFull = LoadedExit;
+
+  Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
+  Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
 
   // Look for any users of the cmpxchg that are just comparing the loaded value
   // against the desired one, and replace them with the CFG-derived version.
@@ -1417,8 +1483,6 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
       expandPartwordCmpXchg(CI);
     return false;
   case TargetLoweringBase::AtomicExpansionKind::LLSC: {
-    assert(ValueSize >= MinCASSize &&
-           "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
     return expandAtomicCmpXchg(CI);
   }
   case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:

diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index a484dda23df9..efaeaacfbb08 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -3504,9 +3504,5 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
 TargetLowering::AtomicExpansionKind
 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
     AtomicCmpXchgInst *AI) const {
-  const DataLayout &DL = AI->getModule()->getDataLayout();
-  unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
-  if (Size >= 4 && Size <= 8)
-    return AtomicExpansionKind::LLSC;
-  return AtomicExpansionKind::None;
+  return AtomicExpansionKind::LLSC;
 }

diff  --git a/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll b/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll
new file mode 100644
index 000000000000..aedbd6101c4f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/atomic-rmw-add.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: .LBB0_1:
+; CHECK:      [[R1:r[0-9]+]] = memw_locked(r0)
+; CHECK-DAG:  [[R2:r[0-9]+]] = and([[R1]],
+; CHECK-DAG:  [[R3:r[0-9]+]] = add([[R1]],
+; CHECK:      [[R2]] |= and([[R3]],
+; CHECK:      memw_locked(r0,[[P0:p[0-3]]]) = [[R2]]
+; CHECK:      if (![[P0]]) jump:nt .LBB0_1
+
+
+%struct.a = type { i8 }
+
+define void @b() #0 {
+  %d = alloca %struct.a
+  %c = getelementptr %struct.a, %struct.a* %d, i32 0, i32 0
+  atomicrmw add i8* %c, i8 2 monotonic
+  ret void
+}
+
+attributes #0 = { "target-cpu"="hexagonv66" }
+

diff  --git a/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll
new file mode 100644
index 000000000000..3a52d8546b85
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/partword-cmpxchg.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK-LABEL: danny
+; CHECK: memw_locked
+define i8 @danny(i8* %a0) unnamed_addr #0 {
+start:
+  %v0 = cmpxchg i8* %a0, i8 0, i8 1 seq_cst seq_cst
+  %v1 = extractvalue { i8, i1 } %v0, 0
+  ret i8 %v1
+}
+
+; CHECK-LABEL: sammy
+; CHECK: memw_locked
+define i16 @sammy(i16* %a0) unnamed_addr #0 {
+start:
+  %v0 = cmpxchg i16* %a0, i16 0, i16 1 seq_cst seq_cst
+  %v1 = extractvalue { i16, i1 } %v0, 0
+  ret i16 %v1
+}
+
+; CHECK-LABEL: kirby
+; CHECK: memw_locked
+define i32 @kirby(i32* %a0) unnamed_addr #0 {
+start:
+  %v0 = cmpxchg i32* %a0, i32 0, i32 1 seq_cst seq_cst
+  %v1 = extractvalue { i32, i1 } %v0, 0
+  ret i32 %v1
+}

diff  --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 5e84460b9c0c..39108874b7f9 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -257,12 +257,13 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[LOADED]]
 
   %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
@@ -307,12 +308,13 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[LOADED]]
 
   %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
@@ -328,9 +330,13 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
@@ -340,16 +346,19 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i32 [[OLDVAL]]
+; CHECK: ret i32 [[LOADED_EXIT]]
 
   %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
   %old = extractvalue { i32, i1 } %pairold, 0
@@ -371,9 +380,13 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
@@ -387,16 +400,19 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i64 [[OLDVAL]]
+; CHECK: ret i64 [[LOADED_EXIT]]
 
   %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
   %old = extractvalue { i64, i1 } %pairold, 0
@@ -411,9 +427,13 @@ define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK:     [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ]
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
 ; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
@@ -423,16 +443,19 @@ define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK:     [[LOADED_NO_STORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ]
 ; CHECK:     call void @llvm.arm.clrex()
 ; CHECK:     br label %[[FAILURE_BB]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK:     [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i32 [[LOADED]]
+; CHECK:     ret i32 [[LOADED_EXIT]]
 
   %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
   %oldval = extractvalue { i32, i1 } %pair, 0

diff  --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 8397182e7e8f..deecf01f2436 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -91,9 +91,13 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
@@ -104,16 +108,19 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i8 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i8 [[OLDVAL]]
+; CHECK: ret i8 [[LOADED_EXIT]]
 
   %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
   %old = extractvalue { i8, i1 } %pairold, 0
@@ -129,9 +136,13 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
@@ -142,16 +153,20 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; The PHI is not required.
+; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i16 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i16 [[OLDVAL]]
+; CHECK: ret i16 [[LOADED_EXIT]]
 
   %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
   %old = extractvalue { i16, i1 } %pairold, 0
@@ -166,9 +181,13 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
@@ -178,16 +197,19 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i32 [[OLDVAL]]
+; CHECK: ret i32 [[LOADED_EXIT]]
 
   %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
   %old = extractvalue { i32, i1 } %pairold, 0
@@ -209,9 +231,13 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
 ; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
 ; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
@@ -225,16 +251,19 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
+; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i64 [[OLDVAL]]
+; CHECK: ret i64 [[LOADED_EXIT]]
 
   %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
   %old = extractvalue { i64, i1 } %pairold, 0

diff  --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 375b41a26dbd..828c1c4cb3b7 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -15,6 +15,7 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK:     [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ]
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
 ; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
@@ -24,16 +25,19 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK:     [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ]
 ; CHECK:     call void @llvm.arm.clrex()
 ; CHECK:     br label %[[FAILURE_BB]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK:     [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ]
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
+; CHECK:     [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i32 [[LOADED]]
+; CHECK:     ret i32 [[LOADED_EXIT]]
 
   %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
   %oldval = extractvalue { i32, i1 } %pair, 0
@@ -87,9 +91,13 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK-NEXT: br label %[[TRY_STORE]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK:     [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ]
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
 ; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
@@ -99,16 +107,19 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK:     [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ]
 ; CHECK:     call void @llvm.arm.clrex()
 ; CHECK:     br label %[[FAILURE_BB]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK:     [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ]
 ; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
+; CHECK:     [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i32 [[LOADED]]
+; CHECK:     ret i32 [[LOADED_EXIT]]
 
   %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
   %oldval = extractvalue { i32, i1 } %pair, 0
@@ -129,6 +140,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) min
 ; CHECK:     br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
+; CHECK:     [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ]
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
 ; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
@@ -138,16 +150,19 @@ define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) min
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK:     [[LOADED_NOSTORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ]
 ; CHECK:     call void @llvm.arm.clrex()
 ; CHECK:     br label %[[FAILURE_BB]]
 
 ; CHECK: [[FAILURE_BB]]:
+; CHECK:     [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ], [ [[LOADED_TRYSTORE]], %[[TRY_STORE]] ]
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
+; CHECK:     [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i32 [[LOADED]]
+; CHECK:     ret i32 [[LOADED_EXIT]]
 
   %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
   %oldval = extractvalue { i32, i1 } %pair, 0

diff  --git a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
index 74c05615d0b9..999fa1541f56 100644
--- a/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
+++ b/llvm/test/Transforms/AtomicExpand/SPARC/partword.ll
@@ -39,12 +39,12 @@ target triple = "sparcv9-unknown-unknown"
 ; CHECK:  %17 = icmp ne i32 %10, %16
 ; CHECK:  br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end
 ; CHECK:partword.cmpxchg.end:
-; CHECK:  %18 = lshr i32 %14, %ShiftAmt
-; CHECK:  %19 = trunc i32 %18 to i8
-; CHECK:  %20 = insertvalue { i8, i1 } undef, i8 %19, 0
-; CHECK:  %21 = insertvalue { i8, i1 } %20, i1 %15, 1
+; CHECK:  %shifted = lshr i32 %14, %ShiftAmt
+; CHECK:  %extracted = trunc i32 %shifted to i8
+; CHECK:  %18 = insertvalue { i8, i1 } undef, i8 %extracted, 0
+; CHECK:  %19 = insertvalue { i8, i1 } %18, i1 %15, 1
 ; CHECK:  fence seq_cst
-; CHECK:  %ret = extractvalue { i8, i1 } %21, 0
+; CHECK:  %ret = extractvalue { i8, i1 } %19, 0
 ; CHECK:  ret i8 %ret
 define i8 @test_cmpxchg_i8(i8* %arg, i8 %old, i8 %new) {
 entry:
@@ -84,12 +84,12 @@ entry:
 ; CHECK:  %17 = icmp ne i32 %10, %16
 ; CHECK:  br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end
 ; CHECK:partword.cmpxchg.end:
-; CHECK:  %18 = lshr i32 %14, %ShiftAmt
-; CHECK:  %19 = trunc i32 %18 to i16
-; CHECK:  %20 = insertvalue { i16, i1 } undef, i16 %19, 0
-; CHECK:  %21 = insertvalue { i16, i1 } %20, i1 %15, 1
+; CHECK:  %shifted = lshr i32 %14, %ShiftAmt
+; CHECK:  %extracted = trunc i32 %shifted to i16
+; CHECK:  %18 = insertvalue { i16, i1 } undef, i16 %extracted, 0
+; CHECK:  %19 = insertvalue { i16, i1 } %18, i1 %15, 1
 ; CHECK:  fence seq_cst
-; CHECK:  %ret = extractvalue { i16, i1 } %21, 0
+; CHECK:  %ret = extractvalue { i16, i1 } %19, 0
 ; CHECK:  ret i16 %ret
 define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) {
 entry:
@@ -125,10 +125,10 @@ entry:
 ; CHECK:  %newloaded = extractvalue { i32, i1 } %9, 0
 ; CHECK:  br i1 %success, label %atomicrmw.end, label %atomicrmw.start
 ; CHECK:atomicrmw.end:
-; CHECK:  %10 = lshr i32 %newloaded, %ShiftAmt
-; CHECK:  %11 = trunc i32 %10 to i16
+; CHECK:  %shifted = lshr i32 %newloaded, %ShiftAmt
+; CHECK:  %extracted = trunc i32 %shifted to i16
 ; CHECK:  fence seq_cst
-; CHECK:  ret i16 %11
+; CHECK:  ret i16 %extracted
 define i16 @test_add_i16(i16* %arg, i16 %val) {
 entry:
   %ret = atomicrmw add i16* %arg, i16 %val seq_cst
@@ -174,15 +174,15 @@ entry:
 
 ; CHECK-LABEL: @test_min_i16(
 ; CHECK:atomicrmw.start:
-; CHECK:  %6 = lshr i32 %loaded, %ShiftAmt
-; CHECK:  %7 = trunc i32 %6 to i16
-; CHECK:  %8 = icmp sle i16 %7, %val
-; CHECK:  %new = select i1 %8, i16 %7, i16 %val
-; CHECK:  %9 = zext i16 %new to i32
-; CHECK:  %10 = shl i32 %9, %ShiftAmt
-; CHECK:  %11 = and i32 %loaded, %Inv_Mask
-; CHECK:  %12 = or i32 %11, %10
-; CHECK:  %13 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %12 monotonic monotonic
+; CHECK:  %shifted = lshr i32 %loaded, %ShiftAmt
+; CHECK:  %extracted = trunc i32 %shifted to i16
+; CHECK:  %6 = icmp sle i16 %extracted, %val
+; CHECK:  %new = select i1 %6, i16 %extracted, i16 %val
+; CHECK:  %extended = zext i16 %new to i32
+; CHECK:  %shifted1 = shl nuw i32 %extended, %ShiftAmt
+; CHECK:  %unmasked = and i32 %loaded, %Inv_Mask
+; CHECK:  %inserted = or i32 %unmasked, %shifted1
+; CHECK:  %7 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %inserted monotonic monotonic
 ; CHECK:atomicrmw.end:
 define i16 @test_min_i16(i16* %arg, i16 %val) {
 entry:


        


More information about the llvm-commits mailing list