[clang] [clang][CodeGen] Emit atomic IR instead of libcalls for misaligned po… (PR #73176)

Thu Dec 21 11:03:48 PST 2023

https://github.com/Logikable updated https://github.com/llvm/llvm-project/pull/73176

>From 1451f9dbbb68bef6ebe02fd06cd9c7a129c31b1c Mon Sep 17 00:00:00 2001
From: Sean Luchen <seanluchen at google.com>
Date: Fri, 17 Nov 2023 17:29:52 +0000
Subject: [PATCH] [clang][CodeGen] Emit atomic IR in place of optimized
 libcalls.

There are many ways to describe this change. For example, this change
reduces the variables Clang uses to decide whether to emit libcalls or
IR, down to only the atomic's size.

Currently, the onus of handling atomics is split between Clang and the
backend. We'd like to move as much as possible to the backend, since many
decisions require target information. This also centralizes the logic,
which helps avoid accidentally emitting both libcalls and native
instructions for the same member.
---
 clang/lib/CodeGen/CGAtomic.cpp                | 315 +++---------------
 clang/test/CodeGen/LoongArch/atomics.c        |   6 +-
 clang/test/CodeGen/PowerPC/quadword-atomics.c |  12 +-
 clang/test/CodeGen/RISCV/riscv-atomics.c      |  45 ++-
 .../SystemZ/gnu-atomic-builtins-i128-8Al.c    | 129 +++----
 clang/test/CodeGen/arm-atomics-m.c            |   8 +-
 clang/test/CodeGen/arm-atomics-m0.c           |  16 +-
 clang/test/CodeGen/atomic-ops-libcall.c       | 119 ++++---
 clang/test/CodeGen/atomic-ops.c               |  27 +-
 clang/test/CodeGen/atomics-inlining.c         |  38 +--
 clang/test/CodeGen/c11atomics.c               |  30 +-
 clang/test/CodeGenCXX/atomic-inline.cpp       |   6 +-
 .../test/CodeGenOpenCL/atomic-ops-libcall.cl  |  57 ++--
 13 files changed, 280 insertions(+), 528 deletions(-)

diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 52e6ddb7d6afb0..bc57f3a65d3df1 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -811,29 +811,6 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
   Builder.SetInsertPoint(ContBB);
 }
 
-static void
-AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
-                  bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
-                  SourceLocation Loc, CharUnits SizeInChars) {
-  if (UseOptimizedLibcall) {
-    // Load value and pass it to the function directly.
-    CharUnits Align = CGF.getContext().getTypeAlignInChars(ValTy);
-    int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
-    ValTy =
-        CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
-    llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), SizeInBits);
-    Address Ptr = Address(Val, ITy, Align);
-    Val = CGF.EmitLoadOfScalar(Ptr, false,
-                               CGF.getContext().getPointerType(ValTy),
-                               Loc);
-    // Coerce the value into an appropriately sized integer type.
-    Args.add(RValue::get(Val), ValTy);
-  } else {
-    // Non-optimized functions always take a reference.
-    Args.add(RValue::get(Val), CGF.getContext().VoidPtrTy);
-  }
-}
-
 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
   QualType MemTy = AtomicTy;
@@ -857,22 +834,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   uint64_t Size = TInfo.Width.getQuantity();
   unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
 
-  bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
-  bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
-  bool UseLibcall = Misaligned | Oversized;
-  bool ShouldCastToIntPtrTy = true;
-
   CharUnits MaxInlineWidth =
       getContext().toCharUnitsFromBits(MaxInlineWidthInBits);
-
   DiagnosticsEngine &Diags = CGM.getDiags();
-
+  bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
+  bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
   if (Misaligned) {
     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_misaligned)
         << (int)TInfo.Width.getQuantity()
         << (int)Ptr.getAlignment().getQuantity();
   }
-
   if (Oversized) {
     Diags.Report(E->getBeginLoc(), diag::warn_atomic_op_oversized)
         << (int)TInfo.Width.getQuantity() << (int)MaxInlineWidth.getQuantity();
@@ -881,6 +852,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   llvm::Value *Order = EmitScalarExpr(E->getOrder());
   llvm::Value *Scope =
       E->getScopeModel() ? EmitScalarExpr(E->getScope()) : nullptr;
+  bool ShouldCastToIntPtrTy = true;
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
@@ -1047,122 +1019,19 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       Dest = Atomics.castToAtomicIntPointer(Dest);
   }
 
-  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
-  if (UseLibcall) {
-    bool UseOptimizedLibcall = false;
-    switch (E->getOp()) {
-    case AtomicExpr::AO__c11_atomic_init:
-    case AtomicExpr::AO__opencl_atomic_init:
-      llvm_unreachable("Already handled above with EmitAtomicInit!");
-
-    case AtomicExpr::AO__atomic_fetch_add:
-    case AtomicExpr::AO__atomic_fetch_and:
-    case AtomicExpr::AO__atomic_fetch_max:
-    case AtomicExpr::AO__atomic_fetch_min:
-    case AtomicExpr::AO__atomic_fetch_nand:
-    case AtomicExpr::AO__atomic_fetch_or:
-    case AtomicExpr::AO__atomic_fetch_sub:
-    case AtomicExpr::AO__atomic_fetch_xor:
-    case AtomicExpr::AO__atomic_add_fetch:
-    case AtomicExpr::AO__atomic_and_fetch:
-    case AtomicExpr::AO__atomic_max_fetch:
-    case AtomicExpr::AO__atomic_min_fetch:
-    case AtomicExpr::AO__atomic_nand_fetch:
-    case AtomicExpr::AO__atomic_or_fetch:
-    case AtomicExpr::AO__atomic_sub_fetch:
-    case AtomicExpr::AO__atomic_xor_fetch:
-    case AtomicExpr::AO__c11_atomic_fetch_add:
-    case AtomicExpr::AO__c11_atomic_fetch_and:
-    case AtomicExpr::AO__c11_atomic_fetch_max:
-    case AtomicExpr::AO__c11_atomic_fetch_min:
-    case AtomicExpr::AO__c11_atomic_fetch_nand:
-    case AtomicExpr::AO__c11_atomic_fetch_or:
-    case AtomicExpr::AO__c11_atomic_fetch_sub:
-    case AtomicExpr::AO__c11_atomic_fetch_xor:
-    case AtomicExpr::AO__hip_atomic_fetch_add:
-    case AtomicExpr::AO__hip_atomic_fetch_and:
-    case AtomicExpr::AO__hip_atomic_fetch_max:
-    case AtomicExpr::AO__hip_atomic_fetch_min:
-    case AtomicExpr::AO__hip_atomic_fetch_or:
-    case AtomicExpr::AO__hip_atomic_fetch_sub:
-    case AtomicExpr::AO__hip_atomic_fetch_xor:
-    case AtomicExpr::AO__opencl_atomic_fetch_add:
-    case AtomicExpr::AO__opencl_atomic_fetch_and:
-    case AtomicExpr::AO__opencl_atomic_fetch_max:
-    case AtomicExpr::AO__opencl_atomic_fetch_min:
-    case AtomicExpr::AO__opencl_atomic_fetch_or:
-    case AtomicExpr::AO__opencl_atomic_fetch_sub:
-    case AtomicExpr::AO__opencl_atomic_fetch_xor:
-    case AtomicExpr::AO__scoped_atomic_fetch_add:
-    case AtomicExpr::AO__scoped_atomic_fetch_and:
-    case AtomicExpr::AO__scoped_atomic_fetch_max:
-    case AtomicExpr::AO__scoped_atomic_fetch_min:
-    case AtomicExpr::AO__scoped_atomic_fetch_nand:
-    case AtomicExpr::AO__scoped_atomic_fetch_or:
-    case AtomicExpr::AO__scoped_atomic_fetch_sub:
-    case AtomicExpr::AO__scoped_atomic_fetch_xor:
-    case AtomicExpr::AO__scoped_atomic_add_fetch:
-    case AtomicExpr::AO__scoped_atomic_and_fetch:
-    case AtomicExpr::AO__scoped_atomic_max_fetch:
-    case AtomicExpr::AO__scoped_atomic_min_fetch:
-    case AtomicExpr::AO__scoped_atomic_nand_fetch:
-    case AtomicExpr::AO__scoped_atomic_or_fetch:
-    case AtomicExpr::AO__scoped_atomic_sub_fetch:
-    case AtomicExpr::AO__scoped_atomic_xor_fetch:
-      // For these, only library calls for certain sizes exist.
-      UseOptimizedLibcall = true;
-      break;
-
-    case AtomicExpr::AO__atomic_load:
-    case AtomicExpr::AO__atomic_store:
-    case AtomicExpr::AO__atomic_exchange:
-    case AtomicExpr::AO__atomic_compare_exchange:
-    case AtomicExpr::AO__scoped_atomic_load:
-    case AtomicExpr::AO__scoped_atomic_store:
-    case AtomicExpr::AO__scoped_atomic_exchange:
-    case AtomicExpr::AO__scoped_atomic_compare_exchange:
-      // Use the generic version if we don't know that the operand will be
-      // suitably aligned for the optimized version.
-      if (Misaligned)
-        break;
-      [[fallthrough]];
-    case AtomicExpr::AO__atomic_load_n:
-    case AtomicExpr::AO__atomic_store_n:
-    case AtomicExpr::AO__atomic_exchange_n:
-    case AtomicExpr::AO__atomic_compare_exchange_n:
-    case AtomicExpr::AO__c11_atomic_load:
-    case AtomicExpr::AO__c11_atomic_store:
-    case AtomicExpr::AO__c11_atomic_exchange:
-    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
-    case AtomicExpr::AO__hip_atomic_load:
-    case AtomicExpr::AO__hip_atomic_store:
-    case AtomicExpr::AO__hip_atomic_exchange:
-    case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
-    case AtomicExpr::AO__opencl_atomic_load:
-    case AtomicExpr::AO__opencl_atomic_store:
-    case AtomicExpr::AO__opencl_atomic_exchange:
-    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
-    case AtomicExpr::AO__scoped_atomic_load_n:
-    case AtomicExpr::AO__scoped_atomic_store_n:
-    case AtomicExpr::AO__scoped_atomic_exchange_n:
-    case AtomicExpr::AO__scoped_atomic_compare_exchange_n:
-      // Only use optimized library calls for sizes for which they exist.
-      // FIXME: Size == 16 optimized library functions exist too.
-      if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
-        UseOptimizedLibcall = true;
-      break;
-    }
+  bool PowerOf2Size = (Size & (Size - 1)) == 0;
+  bool UseLibcall = !PowerOf2Size | (Size > 16);
 
+  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/Library.
+  // Clang should never generate an optimized libcall -- it's better the backend
+  // handle it.
+  if (UseLibcall) {
     CallArgList Args;
-    if (!UseOptimizedLibcall) {
-      // For non-optimized library calls, the size is the first parameter
-      Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
-               getContext().getSizeType());
-    }
-    // Atomic address is the first or second parameter
+    // For non-optimized library calls, the size is the first parameter.
+    Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
+             getContext().getSizeType());
+
+    // The atomic address is the second parameter.
     // The OpenCL atomic library functions only accept pointer arguments to
     // generic address space.
     auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
@@ -1177,18 +1046,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       return getTargetHooks().performAddrSpaceCast(
           *this, V, AS, LangAS::opencl_generic, DestType, false);
     };
-
     Args.add(RValue::get(CastToGenericAddrSpace(Ptr.getPointer(),
                                                 E->getPtr()->getType())),
              getContext().VoidPtrTy);
 
+    // The next 1-3 parameters are op-dependent.
     std::string LibCallName;
-    QualType LoweredMemTy =
-      MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
     QualType RetTy;
     bool HaveRetTy = false;
-    llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
-    bool PostOpMinMax = false;
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
     case AtomicExpr::AO__opencl_atomic_init:
@@ -1199,8 +1064,6 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     // and exchange.
     // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
     //                                void *desired, int success, int failure)
-    // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
-    //                                  int success, int failure)
     case AtomicExpr::AO__atomic_compare_exchange:
     case AtomicExpr::AO__atomic_compare_exchange_n:
     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
@@ -1217,14 +1080,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
                                                   E->getVal1()->getType())),
                getContext().VoidPtrTy);
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
+      Args.add(RValue::get(CastToGenericAddrSpace(Val2.getPointer(),
+                                                  E->getVal2()->getType())),
+               getContext().VoidPtrTy);
       Args.add(RValue::get(Order), getContext().IntTy);
       Order = OrderFail;
       break;
     // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
     //                        int order)
-    // T __atomic_exchange_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_exchange:
     case AtomicExpr::AO__atomic_exchange_n:
     case AtomicExpr::AO__c11_atomic_exchange:
@@ -1233,11 +1096,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__scoped_atomic_exchange:
     case AtomicExpr::AO__scoped_atomic_exchange_n:
       LibCallName = "__atomic_exchange";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
+      Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
+                                                  E->getVal1()->getType())),
+               getContext().VoidPtrTy);
       break;
     // void __atomic_store(size_t size, void *mem, void *val, int order)
-    // void __atomic_store_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_store:
     case AtomicExpr::AO__atomic_store_n:
     case AtomicExpr::AO__c11_atomic_store:
@@ -1248,11 +1111,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       LibCallName = "__atomic_store";
       RetTy = getContext().VoidTy;
       HaveRetTy = true;
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
+      Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
+                                                  E->getVal1()->getType())),
+               getContext().VoidPtrTy);
       break;
     // void __atomic_load(size_t size, void *mem, void *return, int order)
-    // T __atomic_load_N(T *mem, int order)
     case AtomicExpr::AO__atomic_load:
     case AtomicExpr::AO__atomic_load_n:
     case AtomicExpr::AO__c11_atomic_load:
@@ -1262,183 +1125,85 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__scoped_atomic_load_n:
       LibCallName = "__atomic_load";
       break;
-    // T __atomic_add_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_add_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_add_fetch:
     case AtomicExpr::AO__scoped_atomic_add_fetch:
-      PostOp = llvm::Instruction::Add;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_add:
     case AtomicExpr::AO__c11_atomic_fetch_add:
     case AtomicExpr::AO__hip_atomic_fetch_add:
     case AtomicExpr::AO__opencl_atomic_fetch_add:
     case AtomicExpr::AO__scoped_atomic_fetch_add:
-      LibCallName = "__atomic_fetch_add";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_and_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_and_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_and_fetch:
     case AtomicExpr::AO__scoped_atomic_and_fetch:
-      PostOp = llvm::Instruction::And;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_and:
     case AtomicExpr::AO__c11_atomic_fetch_and:
     case AtomicExpr::AO__hip_atomic_fetch_and:
     case AtomicExpr::AO__opencl_atomic_fetch_and:
     case AtomicExpr::AO__scoped_atomic_fetch_and:
-      LibCallName = "__atomic_fetch_and";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_or_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_or_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_or_fetch:
     case AtomicExpr::AO__scoped_atomic_or_fetch:
-      PostOp = llvm::Instruction::Or;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_or:
     case AtomicExpr::AO__c11_atomic_fetch_or:
     case AtomicExpr::AO__hip_atomic_fetch_or:
     case AtomicExpr::AO__opencl_atomic_fetch_or:
     case AtomicExpr::AO__scoped_atomic_fetch_or:
-      LibCallName = "__atomic_fetch_or";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_sub_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_sub_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_sub_fetch:
     case AtomicExpr::AO__scoped_atomic_sub_fetch:
-      PostOp = llvm::Instruction::Sub;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_sub:
     case AtomicExpr::AO__c11_atomic_fetch_sub:
     case AtomicExpr::AO__hip_atomic_fetch_sub:
     case AtomicExpr::AO__opencl_atomic_fetch_sub:
     case AtomicExpr::AO__scoped_atomic_fetch_sub:
-      LibCallName = "__atomic_fetch_sub";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_xor_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_xor_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_xor_fetch:
     case AtomicExpr::AO__scoped_atomic_xor_fetch:
-      PostOp = llvm::Instruction::Xor;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_xor:
     case AtomicExpr::AO__c11_atomic_fetch_xor:
     case AtomicExpr::AO__hip_atomic_fetch_xor:
     case AtomicExpr::AO__opencl_atomic_fetch_xor:
     case AtomicExpr::AO__scoped_atomic_fetch_xor:
-      LibCallName = "__atomic_fetch_xor";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
+    case AtomicExpr::AO__atomic_nand_fetch:
+    case AtomicExpr::AO__atomic_fetch_nand:
+    case AtomicExpr::AO__c11_atomic_fetch_nand:
+    case AtomicExpr::AO__scoped_atomic_fetch_nand:
+    case AtomicExpr::AO__scoped_atomic_nand_fetch:
     case AtomicExpr::AO__atomic_min_fetch:
-    case AtomicExpr::AO__scoped_atomic_min_fetch:
-      PostOpMinMax = true;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_min:
     case AtomicExpr::AO__c11_atomic_fetch_min:
-    case AtomicExpr::AO__scoped_atomic_fetch_min:
     case AtomicExpr::AO__hip_atomic_fetch_min:
     case AtomicExpr::AO__opencl_atomic_fetch_min:
-      LibCallName = E->getValueType()->isSignedIntegerType()
-                        ? "__atomic_fetch_min"
-                        : "__atomic_fetch_umin";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
+    case AtomicExpr::AO__scoped_atomic_fetch_min:
+    case AtomicExpr::AO__scoped_atomic_min_fetch:
     case AtomicExpr::AO__atomic_max_fetch:
-    case AtomicExpr::AO__scoped_atomic_max_fetch:
-      PostOpMinMax = true;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_max:
     case AtomicExpr::AO__c11_atomic_fetch_max:
     case AtomicExpr::AO__hip_atomic_fetch_max:
     case AtomicExpr::AO__opencl_atomic_fetch_max:
     case AtomicExpr::AO__scoped_atomic_fetch_max:
-      LibCallName = E->getValueType()->isSignedIntegerType()
-                        ? "__atomic_fetch_max"
-                        : "__atomic_fetch_umax";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_nand_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_nand_N(T *mem, T val, int order)
-    case AtomicExpr::AO__atomic_nand_fetch:
-    case AtomicExpr::AO__scoped_atomic_nand_fetch:
-      PostOp = llvm::Instruction::And; // the NOT is special cased below
-      [[fallthrough]];
-    case AtomicExpr::AO__atomic_fetch_nand:
-    case AtomicExpr::AO__c11_atomic_fetch_nand:
-    case AtomicExpr::AO__scoped_atomic_fetch_nand:
-      LibCallName = "__atomic_fetch_nand";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
+    case AtomicExpr::AO__scoped_atomic_max_fetch:
+      llvm_unreachable("Integral atomic operations always become atomicrmw!");
     }
 
     if (E->isOpenCL()) {
-      LibCallName = std::string("__opencl") +
-          StringRef(LibCallName).drop_front(1).str();
-
+      LibCallName =
+          std::string("__opencl") + StringRef(LibCallName).drop_front(1).str();
     }
-    // Optimized functions have the size in their name.
-    if (UseOptimizedLibcall)
-      LibCallName += "_" + llvm::utostr(Size);
     // By default, assume we return a value of the atomic type.
     if (!HaveRetTy) {
-      if (UseOptimizedLibcall) {
-        // Value is returned directly.
-        // The function returns an appropriately sized integer type.
-        RetTy = getContext().getIntTypeForBitwidth(
-            getContext().toBits(TInfo.Width), /*Signed=*/false);
-      } else {
-        // Value is returned through parameter before the order.
-        RetTy = getContext().VoidTy;
-        Args.add(RValue::get(Dest.getPointer()), getContext().VoidPtrTy);
-      }
+      // Value is returned through parameter before the order.
+      RetTy = getContext().VoidTy;
+      Args.add(RValue::get(CastToGenericAddrSpace(Dest.getPointer(), RetTy)),
+               getContext().VoidPtrTy);
     }
-    // order is always the last parameter
+    // Order is always the last parameter.
     Args.add(RValue::get(Order),
              getContext().IntTy);
     if (E->isOpenCL())
       Args.add(RValue::get(Scope), getContext().IntTy);
 
-    // PostOp is only needed for the atomic_*_fetch operations, and
-    // thus is only needed for and implemented in the
-    // UseOptimizedLibcall codepath.
-    assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax));
-
     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
     // The value is returned directly from the libcall.
     if (E->isCmpXChg())
       return Res;
 
-    // The value is returned directly for optimized libcalls but the expr
-    // provided an out-param.
-    if (UseOptimizedLibcall && Res.getScalarVal()) {
-      llvm::Value *ResVal = Res.getScalarVal();
-      if (PostOpMinMax) {
-        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
-        ResVal = EmitPostAtomicMinMax(Builder, E->getOp(),
-                                      E->getValueType()->isSignedIntegerType(),
-                                      ResVal, LoadVal1);
-      } else if (PostOp) {
-        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
-        ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
-      }
-      if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch ||
-          E->getOp() == AtomicExpr::AO__scoped_atomic_nand_fetch)
-        ResVal = Builder.CreateNot(ResVal);
-
-      Builder.CreateStore(ResVal, Dest.withElementType(ResVal->getType()));
-    }
-
     if (RValTy->isVoidType())
       return RValue::get(nullptr);
 
diff --git a/clang/test/CodeGen/LoongArch/atomics.c b/clang/test/CodeGen/LoongArch/atomics.c
index edc58d30db186d..0d0eb004ba638a 100644
--- a/clang/test/CodeGen/LoongArch/atomics.c
+++ b/clang/test/CodeGen/LoongArch/atomics.c
@@ -33,9 +33,9 @@ void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
 }
 
 void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
-  // LA32: call i64 @__atomic_load_8
-  // LA32: call void @__atomic_store_8
-  // LA32: call i64 @__atomic_fetch_add_8
+  // LA32: load atomic i64, ptr %a seq_cst, align 8
+  // LA32: store atomic i64 %b, ptr %a seq_cst, align 8
+  // LA32: atomicrmw add ptr %a, i64 %b seq_cst
   // LA64: load atomic i64, ptr %a seq_cst, align 8
   // LA64: store atomic i64 %b, ptr %a seq_cst, align 8
   // LA64: atomicrmw add ptr %a, i64 %b seq_cst
diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c
index bff03b25d27ee9..834fa3159afd17 100644
--- a/clang/test/CodeGen/PowerPC/quadword-atomics.c
+++ b/clang/test/CodeGen/PowerPC/quadword-atomics.c
@@ -23,7 +23,7 @@ typedef __int128_t int128_t;
 // PPC64-QUADWORD-ATOMICS:    [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
 //
 // PPC64-LABEL: @test_load(
-// PPC64:    call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP3:%.*]], ptr noundef [[TMP4:%.*]], i32 noundef signext 2)
+// PPC64:    [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
 //
 Q test_load(AtomicQ *ptr) {
   // expected-no-diagnostics
@@ -34,7 +34,7 @@ Q test_load(AtomicQ *ptr) {
 // PPC64-QUADWORD-ATOMICS:    store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
 //
 // PPC64-LABEL: @test_store(
-// PPC64:    call void @__atomic_store(i64 noundef 16, ptr noundef [[TMP6:%.*]], ptr noundef [[TMP7:%.*]], i32 noundef signext 3)
+// PPC64:    store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
 //
 void test_store(Q val, AtomicQ *ptr) {
   // expected-no-diagnostics
@@ -45,7 +45,7 @@ void test_store(Q val, AtomicQ *ptr) {
 // PPC64-QUADWORD-ATOMICS:    [[TMP3:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
 //
 // PPC64-LABEL: @test_add(
-// PPC64:    [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(ptr noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
+// PPC64:    [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
 //
 void test_add(_Atomic(int128_t) *ptr, int128_t x) {
   // expected-no-diagnostics
@@ -56,7 +56,7 @@ void test_add(_Atomic(int128_t) *ptr, int128_t x) {
 // PPC64-QUADWORD-ATOMICS:    [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
 //
 // PPC64-LABEL: @test_xchg(
-// PPC64:    call void @__atomic_exchange(i64 noundef 16, ptr noundef [[TMP7:%.*]], ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], i32 noundef signext 5)
+// PPC64:    [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
 //
 Q test_xchg(AtomicQ *ptr, Q new) {
   // expected-no-diagnostics
@@ -67,7 +67,7 @@ Q test_xchg(AtomicQ *ptr, Q new) {
 // PPC64-QUADWORD-ATOMICS:    [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
 //
 // PPC64-LABEL: @test_cmpxchg(
-// PPC64:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
+// PPC64:    [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
 //
 int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
   // expected-no-diagnostics
@@ -78,7 +78,7 @@ int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
 // PPC64-QUADWORD-ATOMICS:    [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
 //
 // PPC64-LABEL: @test_cmpxchg_weak(
-// PPC64:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
+// PPC64:    [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
 //
 int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
   // expected-no-diagnostics
diff --git a/clang/test/CodeGen/RISCV/riscv-atomics.c b/clang/test/CodeGen/RISCV/riscv-atomics.c
index f629ad7d72ea82..03f34258cd58bd 100644
--- a/clang/test/CodeGen/RISCV/riscv-atomics.c
+++ b/clang/test/CodeGen/RISCV/riscv-atomics.c
@@ -7,22 +7,19 @@
 // RUN: %clang_cc1 -triple riscv64 -target-feature +a -O1 -emit-llvm %s -o - \
 // RUN:   | FileCheck %s -check-prefix=RV64IA
 
-// This test demonstrates that MaxAtomicInlineWidth is set appropriately when
-// the atomics instruction set extension is enabled.
-
 #include <stdatomic.h>
 #include <stdint.h>
 
 void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
-  // RV32I:  call zeroext i8 @__atomic_load_1
-  // RV32I:  call void @__atomic_store_1
-  // RV32I:  call zeroext i8 @__atomic_fetch_add_1
+  // RV32I: load atomic i8, ptr %a seq_cst, align 1
+  // RV32I: store atomic i8 %b, ptr %a seq_cst, align 1
+  // RV32I: atomicrmw add ptr %a, i8 %b seq_cst, align 1
   // RV32IA: load atomic i8, ptr %a seq_cst, align 1
   // RV32IA: store atomic i8 %b, ptr %a seq_cst, align 1
   // RV32IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
-  // RV64I:  call zeroext i8 @__atomic_load_1
-  // RV64I:  call void @__atomic_store_1
-  // RV64I:  call zeroext i8 @__atomic_fetch_add_1
+  // RV64I: load atomic i8, ptr %a seq_cst, align 1
+  // RV64I: store atomic i8 %b, ptr %a seq_cst, align 1
+  // RV64I: atomicrmw add ptr %a, i8 %b seq_cst, align 1
   // RV64IA: load atomic i8, ptr %a seq_cst, align 1
   // RV64IA: store atomic i8 %b, ptr %a seq_cst, align 1
   // RV64IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
@@ -32,15 +29,15 @@ void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
 }
 
 void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
-  // RV32I:  call i32 @__atomic_load_4
-  // RV32I:  call void @__atomic_store_4
-  // RV32I:  call i32 @__atomic_fetch_add_4
+  // RV32I: load atomic i32, ptr %a seq_cst, align 4
+  // RV32I: store atomic i32 %b, ptr %a seq_cst, align 4
+  // RV32I: atomicrmw add ptr %a, i32 %b seq_cst, align 4
   // RV32IA: load atomic i32, ptr %a seq_cst, align 4
   // RV32IA: store atomic i32 %b, ptr %a seq_cst, align 4
   // RV32IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
-  // RV64I:  call signext i32 @__atomic_load_4
-  // RV64I:  call void @__atomic_store_4
-  // RV64I:  call signext i32 @__atomic_fetch_add_4
+  // RV64I: load atomic i32, ptr %a seq_cst, align 4
+  // RV64I: store atomic i32 %b, ptr %a seq_cst, align 4
+  // RV64I: atomicrmw add ptr %a, i32 %b seq_cst, align 4
   // RV64IA: load atomic i32, ptr %a seq_cst, align 4
   // RV64IA: store atomic i32 %b, ptr %a seq_cst, align 4
   // RV64IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
@@ -50,15 +47,15 @@ void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
 }
 
 void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
-  // RV32I:  call i64 @__atomic_load_8
-  // RV32I:  call void @__atomic_store_8
-  // RV32I:  call i64 @__atomic_fetch_add_8
-  // RV32IA: call i64 @__atomic_load_8
-  // RV32IA: call void @__atomic_store_8
-  // RV32IA: call i64 @__atomic_fetch_add_8
-  // RV64I:  call i64 @__atomic_load_8
-  // RV64I:  call void @__atomic_store_8
-  // RV64I:  call i64 @__atomic_fetch_add_8
+  // RV32I: load atomic i64, ptr %a seq_cst, align 8
+  // RV32I: store atomic i64 %b, ptr %a seq_cst, align 8
+  // RV32I:  atomicrmw add ptr %a, i64 %b seq_cst, align 8
+  // RV32IA: load atomic i64, ptr %a seq_cst, align 8
+  // RV32IA: store atomic i64 %b, ptr %a seq_cst, align 8
+  // RV32IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
+  // RV64I: load atomic i64, ptr %a seq_cst, align 8
+  // RV64I: store atomic i64 %b, ptr %a seq_cst, align 8
+  // RV64I:  atomicrmw add ptr %a, i64 %b seq_cst, align 8
   // RV64IA: load atomic i64, ptr %a seq_cst, align 8
   // RV64IA: store atomic i64 %b, ptr %a seq_cst, align 8
   // RV64IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
diff --git a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
index e38e6572bd58f4..8759df7b19c638 100644
--- a/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
+++ b/clang/test/CodeGen/SystemZ/gnu-atomic-builtins-i128-8Al.c
@@ -20,10 +20,8 @@ __int128 Des;
 
 // CHECK-LABEL: @f1(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    call void @__atomic_load(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull [[ATOMIC_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 8, !tbaa [[TBAA2:![0-9]+]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8
+// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 __int128 f1() {
@@ -32,8 +30,8 @@ __int128 f1() {
 
 // CHECK-LABEL: @f2(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @__atomic_load(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull @Ret, i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Ret, align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load atomic i128, ptr @Ptr seq_cst, align 8
+// CHECK-NEXT:    store i128 [[TMP0]], ptr @Ret, align 8
 // CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -44,10 +42,8 @@ __int128 f2() {
 
 // CHECK-LABEL: @f3(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTATOMICTMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[DOTATOMICTMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_store(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull [[DOTATOMICTMP]], i32 noundef signext 5)
+// CHECK-NEXT:    store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8
 // CHECK-NEXT:    ret void
 //
 void f3() {
@@ -56,7 +52,8 @@ void f3() {
 
 // CHECK-LABEL: @f4(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @__atomic_store(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull @Val, i32 noundef signext 5)
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8
+// CHECK-NEXT:    store atomic i128 [[TMP0]], ptr @Ptr seq_cst, align 8
 // CHECK-NEXT:    ret void
 //
 void f4() {
@@ -65,12 +62,8 @@ void f4() {
 
 // CHECK-LABEL: @f5(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTATOMICTMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[DOTATOMICTMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_exchange(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull [[DOTATOMICTMP]], ptr noundef nonnull [[ATOMIC_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -80,9 +73,10 @@ __int128 f5() {
 
 // CHECK-LABEL: @f6(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    tail call void @__atomic_exchange(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull @Val, ptr noundef nonnull @Ret, i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Ret, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xchg ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
+// CHECK-NEXT:    store i128 [[TMP1]], ptr @Ret, align 8
+// CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
 __int128 f6() {
@@ -92,11 +86,17 @@ __int128 f6() {
 
 // CHECK-LABEL: @f7(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DOTATOMICTMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Des, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[DOTATOMICTMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull @Exp, ptr noundef nonnull [[DOTATOMICTMP]], i32 noundef signext 5, i32 noundef signext 5)
-// CHECK-NEXT:    ret i1 [[CALL]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Exp, align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP1]], i128 [[TMP0]] seq_cst seq_cst, align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT:    br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK:       cmpxchg.store_expected:
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT:    store i128 [[TMP4]], ptr @Exp, align 8
+// CHECK-NEXT:    br label [[CMPXCHG_CONTINUE]]
+// CHECK:       cmpxchg.continue:
+// CHECK-NEXT:    ret i1 [[TMP3]]
 //
 _Bool f7() {
   return __atomic_compare_exchange_n(&Ptr, &Exp, Des, 0,
@@ -105,8 +105,17 @@ _Bool f7() {
 
 // CHECK-LABEL: @f8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[CALL:%.*]] = tail call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef nonnull @Ptr, ptr noundef nonnull @Exp, ptr noundef nonnull @Des, i32 noundef signext 5, i32 noundef signext 5)
-// CHECK-NEXT:    ret i1 [[CALL]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Exp, align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr @Des, align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = cmpxchg ptr @Ptr, i128 [[TMP0]], i128 [[TMP1]] seq_cst seq_cst, align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i128, i1 } [[TMP2]], 1
+// CHECK-NEXT:    br i1 [[TMP3]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
+// CHECK:       cmpxchg.store_expected:
+// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i128, i1 } [[TMP2]], 0
+// CHECK-NEXT:    store i128 [[TMP4]], ptr @Exp, align 8
+// CHECK-NEXT:    br label [[CMPXCHG_CONTINUE]]
+// CHECK:       cmpxchg.continue:
+// CHECK-NEXT:    ret i1 [[TMP3]]
 //
 _Bool f8() {
   return __atomic_compare_exchange(&Ptr, &Exp, &Des, 0,
@@ -115,12 +124,8 @@ _Bool f8() {
 
 // CHECK-LABEL: @f9(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_add_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = add i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
@@ -131,12 +136,8 @@ __int128 f9() {
 
 // CHECK-LABEL: @f10(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_sub_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = sub i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
@@ -147,12 +148,8 @@ __int128 f10() {
 
 // CHECK-LABEL: @f11(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_and_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
@@ -163,12 +160,8 @@ __int128 f11() {
 
 // CHECK-LABEL: @f12(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_xor_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = xor i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
@@ -179,12 +172,8 @@ __int128 f12() {
 
 // CHECK-LABEL: @f13(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_or_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = or i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    store i128 [[TMP2]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
@@ -195,12 +184,8 @@ __int128 f13() {
 
 // CHECK-LABEL: @f14(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_nand_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    [[TMP2:%.*]] = and i128 [[TMP1]], [[TMP0]]
 // CHECK-NEXT:    [[TMP3:%.*]] = xor i128 [[TMP2]], -1
 // CHECK-NEXT:    store i128 [[TMP3]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
@@ -212,12 +197,8 @@ __int128 f14() {
 
 // CHECK-LABEL: @f15(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_add_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -227,12 +208,8 @@ __int128 f15() {
 
 // CHECK-LABEL: @f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_sub_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -242,12 +219,8 @@ __int128 f16() {
 
 // CHECK-LABEL: @f17(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_and_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw and ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -257,12 +230,8 @@ __int128 f17() {
 
 // CHECK-LABEL: @f18(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_xor_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw xor ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -272,12 +241,8 @@ __int128 f18() {
 
 // CHECK-LABEL: @f19(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_or_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw or ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
@@ -287,12 +252,8 @@ __int128 f19() {
 
 // CHECK-LABEL: @f20(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP:%.*]] = alloca i128, align 8
-// CHECK-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i128, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i128, ptr @Val, align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    store i128 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 8, !tbaa [[TBAA2]]
-// CHECK-NEXT:    call void @__atomic_fetch_nand_16(ptr nonnull sret(i128) align 8 [[TMP]], ptr noundef nonnull @Ptr, ptr noundef nonnull [[INDIRECT_ARG_TEMP]], i32 noundef signext 5)
-// CHECK-NEXT:    [[TMP1:%.*]] = load i128, ptr [[TMP]], align 8, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw nand ptr @Ptr, i128 [[TMP0]] seq_cst, align 8
 // CHECK-NEXT:    store i128 [[TMP1]], ptr [[AGG_RESULT:%.*]], align 8, !tbaa [[TBAA2]]
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/CodeGen/arm-atomics-m.c b/clang/test/CodeGen/arm-atomics-m.c
index b9cc72bc6b98ab..6087fd9d6a66ae 100644
--- a/clang/test/CodeGen/arm-atomics-m.c
+++ b/clang/test/CodeGen/arm-atomics-m.c
@@ -22,14 +22,14 @@ void test_presence(void)
   r = 0;
   __atomic_store(&i, &r, memory_order_seq_cst);
 
-  // CHECK: __atomic_fetch_add_8
+  // CHECK: atomicrmw add ptr {{.*}} seq_cst, align 8
   __atomic_fetch_add(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_fetch_sub_8
+  // CHECK: atomicrmw sub ptr {{.*}} seq_cst, align 8
   __atomic_fetch_sub(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_load_8
+  // CHECK: load atomic i64, ptr {{.*}} seq_cst, align 8
   long long rl;
   __atomic_load(&l, &rl, memory_order_seq_cst);
-  // CHECK: __atomic_store_8
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} seq_cst, align 8
   rl = 0;
   __atomic_store(&l, &rl, memory_order_seq_cst);
 }
diff --git a/clang/test/CodeGen/arm-atomics-m0.c b/clang/test/CodeGen/arm-atomics-m0.c
index 335a1d2711f808..94e344cf608df4 100644
--- a/clang/test/CodeGen/arm-atomics-m0.c
+++ b/clang/test/CodeGen/arm-atomics-m0.c
@@ -11,25 +11,25 @@ typedef enum memory_order {
 void test_presence(void)
 {
   // CHECK-LABEL: @test_presence
-  // CHECK: __atomic_fetch_add_4
+  // CHECK: atomicrmw add ptr {{.*}} seq_cst, align 4
   __atomic_fetch_add(&i, 1, memory_order_seq_cst);
-  // CHECK: __atomic_fetch_sub_4
+  // CHECK: atomicrmw sub {{.*}} seq_cst, align 4
   __atomic_fetch_sub(&i, 1, memory_order_seq_cst);
-  // CHECK: __atomic_load_4
+  // CHECK: load atomic i32, ptr {{.*}} seq_cst, align 4
   int r;
   __atomic_load(&i, &r, memory_order_seq_cst);
-  // CHECK: __atomic_store_4
+  // CHECK: store atomic i32 {{.*}}, ptr {{.*}} seq_cst, align 4
   r = 0;
   __atomic_store(&i, &r, memory_order_seq_cst);
 
-  // CHECK: __atomic_fetch_add_8
+  // CHECK: atomicrmw add {{.*}} seq_cst, align 8
   __atomic_fetch_add(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_fetch_sub_8
+  // CHECK: atomicrmw sub {{.*}} seq_cst, align 8
   __atomic_fetch_sub(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_load_8
+  // CHECK: load atomic i64, ptr {{.*}} seq_cst, align 8
   long long rl;
   __atomic_load(&l, &rl, memory_order_seq_cst);
-  // CHECK: __atomic_store_8
+  // CHECK: store atomic i64 {{.*}}, ptr {{.*}} seq_cst, align 8
   rl = 0;
   __atomic_store(&l, &rl, memory_order_seq_cst);
 }
diff --git a/clang/test/CodeGen/atomic-ops-libcall.c b/clang/test/CodeGen/atomic-ops-libcall.c
index 745ccd22bf33f0..2ec456a1a7a606 100644
--- a/clang/test/CodeGen/atomic-ops-libcall.c
+++ b/clang/test/CodeGen/atomic-ops-libcall.c
@@ -1,7 +1,4 @@
-// RUN: %clang_cc1 < %s -triple armv5e-none-linux-gnueabi -emit-llvm -O1 | FileCheck %s
-
-// FIXME: This file should not be checking -O1 output.
-// Ie, it is testing many IR optimizer passes as part of front-end verification.
+// RUN: %clang_cc1 < %s -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck %s
 
 enum memory_order {
   memory_order_relaxed, memory_order_consume, memory_order_acquire,
@@ -9,112 +6,142 @@ enum memory_order {
 };
 
 int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) {
-  // CHECK: test_c11_atomic_fetch_add_int_ptr
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 12, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_add_int_ptr
+  // CHECK: store i32 12, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
 }
 
 int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) {
-  // CHECK: test_c11_atomic_fetch_sub_int_ptr
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 20, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_sub_int_ptr
+  // CHECK: store i32 20, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
 }
 
 int test_c11_atomic_fetch_add_int(_Atomic(int) *p) {
-  // CHECK: test_c11_atomic_fetch_add_int
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 3, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_add_int
+  // CHECK: store i32 3, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
 }
 
 int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) {
-  // CHECK: test_c11_atomic_fetch_sub_int
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 5, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_sub_int
+  // CHECK: store i32 5, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
 }
 
 int *fp2a(int **p) {
-  // CHECK: @fp2a
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 4, i32 noundef 0)
+  // CHECK-LABEL: define{{.*}} @fp2a
+  // CHECK: store i32 4, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] monotonic, align 4
   // Note, the GNU builtins do not multiply by sizeof(T)!
   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
 }
 
 int test_atomic_fetch_add(int *p) {
-  // CHECK: test_atomic_fetch_add
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_add
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_add(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_sub(int *p) {
-  // CHECK: test_atomic_fetch_sub
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_sub
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_sub(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_and(int *p) {
-  // CHECK: test_atomic_fetch_and
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_and
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw and ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_and(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_or(int *p) {
-  // CHECK: test_atomic_fetch_or
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_or
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw or ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_or(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_xor(int *p) {
-  // CHECK: test_atomic_fetch_xor
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_xor
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw xor ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_xor(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_nand(int *p) {
-  // CHECK: test_atomic_fetch_nand
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_nand
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw nand ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_nand(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_add_fetch(int *p) {
-  // CHECK: test_atomic_add_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_add_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], [[TMP1]]
   return __atomic_add_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_sub_fetch(int *p) {
-  // CHECK: test_atomic_sub_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55
+  // CHECK-LABEL: define{{.*}} @test_atomic_sub_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = sub i32 [[CALL]], [[TMP1]]
   return __atomic_sub_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_and_fetch(int *p) {
-  // CHECK: test_atomic_and_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_and_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw and ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = and i32 [[CALL]], [[TMP1]]
   return __atomic_and_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_or_fetch(int *p) {
-  // CHECK: test_atomic_or_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_or_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw or ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = or i32 [[CALL]], [[TMP1]]
   return __atomic_or_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_xor_fetch(int *p) {
-  // CHECK: test_atomic_xor_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_xor_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw xor ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], [[TMP1]]
   return __atomic_xor_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_nand_fetch(int *p) {
-  // CHECK: test_atomic_nand_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // FIXME: We should not be checking optimized IR. It changes independently of clang.
-  // FIXME-CHECK: [[AND:%[^ ]*]] = and i32 [[CALL]], 55
-  // FIXME-CHECK: {{%[^ ]*}} = xor i32 [[AND]], -1
+  // CHECK-LABEL: define{{.*}} @test_atomic_nand_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw nand ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: [[TMP2:%[^ ]*]] = and i32 [[CALL]], [[TMP1]]
+  // CHECK: {{%[^ ]*}} = xor i32 [[TMP2]], -1
   return __atomic_nand_fetch(p, 55, memory_order_seq_cst);
 }
diff --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c
index 9ac05d270b97c5..b6060dcc540f90 100644
--- a/clang/test/CodeGen/atomic-ops.c
+++ b/clang/test/CodeGen/atomic-ops.c
@@ -198,7 +198,8 @@ struct S implicit_load(_Atomic(struct S) *a) {
 struct S fd1(struct S *a) {
   // CHECK-LABEL: @fd1
   // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
-  // CHECK: call void @__atomic_load(i32 noundef 8, ptr noundef {{.*}}, ptr noundef [[RETVAL]], i32 noundef 5)
+  // CHECK: [[TMP1:%.*]] = load atomic i64, ptr {{%.*}} seq_cst, align 4
+  // CHECK-NEXT: store i64 [[TMP1]], ptr [[RETVAL]], align 4
   // CHECK: ret
   struct S ret;
   __atomic_load(a, &ret, memory_order_seq_cst);
@@ -213,7 +214,8 @@ void fd2(struct S *a, struct S *b) {
   // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-  // CHECK-NEXT: call void @__atomic_store(i32 noundef 8, ptr noundef [[LOAD_A_PTR]], ptr noundef [[LOAD_B_PTR]],
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+  // CHECK-NEXT: store atomic i64 [[LOAD_B]], ptr [[LOAD_A_PTR]] seq_cst, align 4
   // CHECK-NEXT: ret void
   __atomic_store(a, b, memory_order_seq_cst);
 }
@@ -229,7 +231,9 @@ void fd3(struct S *a, struct S *b, struct S *c) {
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-  // CHECK-NEXT: call void @__atomic_exchange(i32 noundef 8, ptr noundef [[LOAD_A_PTR]], ptr noundef [[LOAD_B_PTR]], ptr noundef [[LOAD_C_PTR]],
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+  // CHECK-NEXT: [[RESULT:%.*]] = atomicrmw xchg ptr [[LOAD_A_PTR]], i64 [[LOAD_B]] seq_cst, align 4
+  // CHECK-NEXT: store i64 [[RESULT]], ptr [[LOAD_C_PTR]], align 4
 
   __atomic_exchange(a, b, c, memory_order_seq_cst);
 }
@@ -245,8 +249,9 @@ _Bool fd4(struct S *a, struct S *b, struct S *c) {
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-  // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 8, ptr noundef [[LOAD_A_PTR]], ptr noundef [[LOAD_B_PTR]], ptr noundef [[LOAD_C_PTR]],
-  // CHECK-NEXT: ret i1 [[CALL]]
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+  // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, ptr [[LOAD_C_PTR]], align 4
+  // CHECK-NEXT: {{.*}} = cmpxchg weak ptr [[LOAD_A_PTR]], i64 [[LOAD_B]], i64 [[LOAD_C]] seq_cst seq_cst, align 4
   return __atomic_compare_exchange(a, b, c, 1, 5, 5);
 }
 
@@ -682,13 +687,13 @@ void test_underaligned(void) {
   // CHECK-LABEL: @test_underaligned
   struct Underaligned { char c[8]; } underaligned_a, underaligned_b, underaligned_c;
 
-  // CHECK: call void @__atomic_load(i32 noundef 8,
+  // CHECK: load atomic i64, {{.*}}, align 1
   __atomic_load(&underaligned_a, &underaligned_b, memory_order_seq_cst);
-  // CHECK: call void @__atomic_store(i32 noundef 8,
+  // CHECK: store atomic i64 {{.*}}, align 1
   __atomic_store(&underaligned_a, &underaligned_b, memory_order_seq_cst);
-  // CHECK: call void @__atomic_exchange(i32 noundef 8,
+  // CHECK: atomicrmw xchg ptr {{.*}}, align 1
   __atomic_exchange(&underaligned_a, &underaligned_b, &underaligned_c, memory_order_seq_cst);
-  // CHECK: call {{.*}} @__atomic_compare_exchange(i32 noundef 8,
+  // CHECK: cmpxchg weak ptr {{.*}}, align 1
   __atomic_compare_exchange(&underaligned_a, &underaligned_b, &underaligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
 
   __attribute__((aligned)) struct Underaligned aligned_a, aligned_b, aligned_c;
@@ -747,7 +752,7 @@ void test_minmax_postop(int *si, unsigned *ui, unsigned short *us, signed char *
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
   // CHECK: store i32 [[NEW]], ptr
   *si = __atomic_min_fetch(si, 42, memory_order_release);
-  
+
   // CHECK: [[OLD:%.*]] = atomicrmw umax ptr [[PTR:%.*]], i32 [[RHS:%.*]] release, align 4
   // CHECK: [[TST:%.*]] = icmp ugt i32 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i32 [[OLD]], i32 [[RHS]]
@@ -772,7 +777,7 @@ void test_minmax_postop(int *si, unsigned *ui, unsigned short *us, signed char *
   // CHECK: store i8 [[NEW]], ptr
   *sc = __atomic_min_fetch(sc, 42, memory_order_release);
 
-  // CHECK: [[OLD:%.*]] = call i64 @__atomic_fetch_umin_8(ptr noundef {{%.*}}, i64 noundef [[RHS:%.*]],
+  // CHECK: [[OLD:%.*]] = atomicrmw umin ptr {{%.*}}, i64 [[RHS:%.*]] release, align 4
   // CHECK: [[TST:%.*]] = icmp ult i64 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i64 [[OLD]], i64 [[RHS]]
   // CHECK: store i64 [[NEW]], ptr
diff --git a/clang/test/CodeGen/atomics-inlining.c b/clang/test/CodeGen/atomics-inlining.c
index 862c63076b2dc0..217a294ee84abc 100644
--- a/clang/test/CodeGen/atomics-inlining.c
+++ b/clang/test/CodeGen/atomics-inlining.c
@@ -38,14 +38,14 @@ void test1(void) {
   (void)__atomic_store(&a1, &a2, memory_order_seq_cst);
 
 // ARM-LABEL: define{{.*}} void @test1
-// ARM: = call{{.*}} zeroext i8 @__atomic_load_1(ptr noundef @c1
-// ARM: call{{.*}} void @__atomic_store_1(ptr noundef @c1, i8 noundef zeroext
-// ARM: = call{{.*}} zeroext i16 @__atomic_load_2(ptr noundef @s1
-// ARM: call{{.*}} void @__atomic_store_2(ptr noundef @s1, i16 noundef zeroext
-// ARM: = call{{.*}} i32 @__atomic_load_4(ptr noundef @i1
-// ARM: call{{.*}} void @__atomic_store_4(ptr noundef @i1, i32 noundef
-// ARM: = call{{.*}} i64 @__atomic_load_8(ptr noundef @ll1
-// ARM: call{{.*}} void @__atomic_store_8(ptr noundef @ll1, i64 noundef
+// ARM: = load atomic i8, ptr @c1 seq_cst, align 1
+// ARM: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1
+// ARM: = load atomic i16, ptr @s1 seq_cst, align 2
+// ARM: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
+// ARM: = load atomic i32, ptr @i1 seq_cst, align 4
+// ARM: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
+// ARM: = load atomic i64, ptr @ll1 seq_cst, align 8
+// ARM: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // ARM: call{{.*}} void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // ARM: call{{.*}} void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 
@@ -56,8 +56,8 @@ void test1(void) {
 // PPC32: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
 // PPC32: = load atomic i32, ptr @i1 seq_cst, align 4
 // PPC32: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
-// PPC32: = call i64 @__atomic_load_8(ptr noundef @ll1
-// PPC32: call void @__atomic_store_8(ptr noundef @ll1, i64
+// PPC32: = load atomic i64, ptr @ll1 seq_cst, align 8
+// PPC32: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // PPC32: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // PPC32: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 
@@ -80,8 +80,8 @@ void test1(void) {
 // MIPS32: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
 // MIPS32: = load atomic i32, ptr @i1 seq_cst, align 4
 // MIPS32: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
-// MIPS32: call i64 @__atomic_load_8(ptr noundef @ll1
-// MIPS32: call void @__atomic_store_8(ptr noundef @ll1, i64
+// MIPS32: = load atomic i64, ptr @ll1 seq_cst, align 8
+// MIPS32: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // MIPS32: call void @__atomic_load(i32 noundef signext 100, ptr noundef @a1, ptr noundef @a2
 // MIPS32: call void @__atomic_store(i32 noundef signext 100, ptr noundef @a1, ptr noundef @a2
 
@@ -94,7 +94,7 @@ void test1(void) {
 // MIPS64: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
 // MIPS64: = load atomic i64, ptr @ll1 seq_cst, align 8
 // MIPS64: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
-// MIPS64: call void @__atomic_load(i64 noundef zeroext 100, ptr noundef @a1
+// MIPS64: call void @__atomic_load(i64 noundef zeroext 100, ptr noundef @a1, ptr noundef @a2
 // MIPS64: call void @__atomic_store(i64 noundef zeroext 100, ptr noundef @a1, ptr noundef @a2
 
 // SPARC-LABEL: define{{.*}} void @test1
@@ -104,12 +104,12 @@ void test1(void) {
 // SPARC: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
 // SPARC: = load atomic i32, ptr @i1 seq_cst, align 4
 // SPARC: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
-// SPARCV8: call i64 @__atomic_load_8(ptr noundef @ll1
-// SPARCV8: call void @__atomic_store_8(ptr noundef @ll1, i64
-// SPARCV9: load atomic i64, ptr @ll1 seq_cst, align 8
-// SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
+// SPARC: load atomic i64, ptr @ll1 seq_cst, align 8
+// SPARC: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
+// SPARCV9: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2
+// SPARCV9: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2
 
 // NVPTX-LABEL: define{{.*}} void @test1
 // NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1
@@ -120,7 +120,7 @@ void test1(void) {
 // NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
 // NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8
 // NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
-// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
-// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5)
+// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2
+// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2
 
 }
diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index 773ed41991f783..c5a554b39f61a1 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -343,10 +343,9 @@ PS test_promoted_load(_Atomic(PS) *addr) {
   // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
   // CHECK:   store ptr %addr, ptr [[ADDR_ARG]], align 4
   // CHECK:   [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
-  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(ptr noundef [[ADDR]], i32 noundef 5)
-  // CHECK:   store i64 [[RES]], ptr [[ATOMIC_RES]], align 8
-  // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
-
+  // CHECK:   [[ATOMIC_RES:%.*]] = load atomic i64, ptr [[ADDR]] seq_cst, align 8
+  // CHECK:   store i64 [[ATOMIC_RES]], ptr [[ATOMIC_RES_ADDR:%.*]], align 8
+  // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES_ADDR]], i32 6, i1 false)
   return __c11_atomic_load(addr, 5);
 }
 
@@ -362,8 +361,8 @@ void test_promoted_store(_Atomic(PS) *addr, PS *val) {
   // CHECK:   [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
-  // CHECK:   [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 2
-  // CHECK:   call arm_aapcscc void @__atomic_store_8(ptr noundef [[ADDR]], i64 noundef [[VAL64]], i32 noundef 5)
+  // CHECK:   [[ATOMIC:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
+  // CHECK:   store atomic i64 [[ATOMIC]], ptr [[ADDR]] seq_cst, align 8
   __c11_atomic_store(addr, *val, 5);
 }
 
@@ -380,10 +379,10 @@ PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
   // CHECK:   [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
-  // CHECK:   [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 2
-  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(ptr noundef [[ADDR]], i64 noundef [[VAL64]], i32 noundef 5)
-  // CHECK:   store i64 [[RES]], ptr [[ATOMIC_RES]], align 8
-  // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
+  // CHECK:   [[ATOMIC:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = atomicrmw xchg ptr [[ADDR]], i64 [[ATOMIC]] seq_cst, align 8
+  // CHECK:   store i64 [[ATOMIC_RES]], ptr [[ATOMIC_RES_PTR:%.*]], align 8
+  // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES_PTR]], i32 6, i1 false)
   return __c11_atomic_exchange(addr, *val, 5);
 }
 
@@ -404,9 +403,10 @@ _Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[NEW]], i32 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_DESIRED]], ptr align 2 [[DESIRED]], i64 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_NEW]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
-  // CHECK:   [[NEW64:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 2
-  // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(ptr noundef [[ADDR]], ptr noundef [[ATOMIC_DESIRED]], i64 noundef [[NEW64]], i32 noundef 5, i32 noundef 5)
-  // CHECK:   ret i1 [[RES]]
+  // CHECK:   [[VAL1:%.*]] = load i64, ptr [[ATOMIC_DESIRED]], align 8
+  // CHECK:   [[VAL2:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 8
+  // CHECK:   [[RES_PAIR:%.*]] = cmpxchg ptr [[ADDR]], i64 [[VAL1]], i64 [[VAL2]] seq_cst seq_cst, align 8
+  // CHECK:   [[RES:%.*]] = extractvalue { i64, i1 } [[RES_PAIR]], 1
   return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
 
@@ -414,12 +414,12 @@ struct Empty {};
 
 struct Empty test_empty_struct_load(_Atomic(struct Empty)* empty) {
   // CHECK-LABEL: @test_empty_struct_load(
-  // CHECK: call arm_aapcscc zeroext i8 @__atomic_load_1(ptr noundef %{{.*}}, i32 noundef 5)
+  // CHECK: load atomic i8, ptr {{.*}}, align 1
   return __c11_atomic_load(empty, 5);
 }
 
 void test_empty_struct_store(_Atomic(struct Empty)* empty, struct Empty value) {
   // CHECK-LABEL: @test_empty_struct_store(
-  // CHECK: call arm_aapcscc void @__atomic_store_1(ptr noundef %{{.*}}, i8 noundef zeroext %{{.*}}, i32 noundef 5)
+  // CHECK: store atomic i8 {{.*}}, ptr {{.*}}, align 1
   __c11_atomic_store(empty, value, 5);
 }
diff --git a/clang/test/CodeGenCXX/atomic-inline.cpp b/clang/test/CodeGenCXX/atomic-inline.cpp
index 701bbd57b485c7..c8fa877a37beb5 100644
--- a/clang/test/CodeGenCXX/atomic-inline.cpp
+++ b/clang/test/CodeGenCXX/atomic-inline.cpp
@@ -42,7 +42,7 @@ AM16 m16;
 AM16 load16() {
   AM16 am;
   // CHECK-LABEL: @_Z6load16v
-  // CHECK: call void @__atomic_load
+  // CHECK: load atomic i128, {{.*}} monotonic, align 16
   // CORE2-LABEL: @_Z6load16v
   // CORE2: load atomic i128, {{.*}} monotonic, align 16
   __atomic_load(&m16, &am, 0);
@@ -52,7 +52,7 @@ AM16 load16() {
 AM16 s16;
 void store16() {
   // CHECK-LABEL: @_Z7store16v
-  // CHECK: call void @__atomic_store
+  // CHECK: store atomic i128 {{.*}} monotonic, align 16
   // CORE2-LABEL: @_Z7store16v
   // CORE2: store atomic i128 {{.*}} monotonic, align 16
   __atomic_store(&m16, &s16, 0);
@@ -61,7 +61,7 @@ void store16() {
 bool cmpxchg16() {
   AM16 am;
   // CHECK-LABEL: @_Z9cmpxchg16v
-  // CHECK: call noundef zeroext i1 @__atomic_compare_exchange
+  // CHECK: cmpxchg ptr {{.*}} monotonic monotonic, align 16
   // CORE2-LABEL: @_Z9cmpxchg16v
   // CORE2: cmpxchg ptr {{.*}} monotonic monotonic, align 16
   return __atomic_compare_exchange(&m16, &s16, &am, 0, 0, 0);
diff --git a/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl b/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
index 2f020c21082124..d615ff6bec4140 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
@@ -20,63 +20,60 @@ typedef enum memory_scope {
 
 void f(atomic_int *i, global atomic_int *gi, local atomic_int *li, private atomic_int *pi, atomic_uint *ui, int cmp, int order, int scope) {
   int x;
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(ptr noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: load atomic i32, ptr addrspace(4) {{.*}} seq_cst, align 4
+  // ARM: load atomic i32, ptr {{.*}} seq_cst, align 4
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: store atomic i32 {{.*}}, ptr addrspace(4) {{.*}} seq_cst, align 4
+  // ARM: store atomic i32 {{.*}}, ptr {{.*}} seq_cst, align 4
   __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: %[[GP:[0-9]+]] = addrspacecast ptr addrspace(1) {{%[0-9]+}} to ptr addrspace(4)
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef %[[GP]], i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: store atomic i32 {{.*}}, ptr addrspace(1) {{.*}} seq_cst, align 4
+  // ARM: store atomic i32 {{.*}}, ptr {{.*}} seq_cst, align 4
   __opencl_atomic_store(gi, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: %[[GP:[0-9]+]] = addrspacecast ptr addrspace(3) {{%[0-9]+}} to ptr addrspace(4)
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef %[[GP]], i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: store atomic i32 {{.*}}, ptr addrspace(3) {{.*}} seq_cst, align 4
+  // ARM: store atomic i32 {{.*}}, ptr {{.*}} seq_cst, align 4
   __opencl_atomic_store(li, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: %[[GP:[0-9]+]] = addrspacecast ptr {{%[0-9]+}} to ptr addrspace(4)
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef %[[GP]], i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: store atomic i32 {{.*}}, ptr {{.*}} seq_cst, align 4
+  // ARM: store atomic i32 {{.*}}, ptr {{.*}} seq_cst, align 4
   __opencl_atomic_store(pi, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: atomicrmw add ptr addrspace(4) {{.*}}, i32 {{.*}} seq_cst, align 4
+  // ARM: atomicrmw add ptr {{.*}}, i32 {{.*}} seq_cst, align 4
   x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: atomicrmw min ptr addrspace(4) {{.*}}, i32 {{.*}} seq_cst, align 4
+  // ARM: atomicrmw min ptr {{.*}}, i32 {{.*}} seq_cst, align 4
   x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: atomicrmw umin ptr addrspace(4) {{.*}}, i32 {{.*}} seq_cst, align 4
+  // ARM: atomicrmw umin ptr {{.*}}, i32 {{.*}} seq_cst, align 4
   x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
+  // SPIR: cmpxchg ptr addrspace(4) {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+  // ARM: cmpxchg ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
   x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
+  // SPIR: cmpxchg weak ptr addrspace(4) {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+  // ARM: cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 2)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 2)
+  // SPIR: cmpxchg weak ptr addrspace(4) {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+  // ARM: cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_device);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 3)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 3)
+  // SPIR: cmpxchg weak ptr addrspace(4) {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+  // ARM: cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_all_svm_devices);
 
 #ifdef cl_khr_subgroups
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 4)
+  // SPIR: cmpxchg weak ptr addrspace(4) {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group);
 #endif
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
+  // SPIR: cmpxchg weak ptr addrspace(4) {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
+  // ARM: cmpxchg weak ptr {{.*}}, i32 {{.*}}, i32 {{.*}} seq_cst seq_cst, align 4
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, order, order, scope);
 }