[clang] [clang][CodeGen] Emit atomic IR instead of libcalls for misaligned po… (PR #73176)

via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 1 09:49:17 PST 2023


https://github.com/Logikable updated https://github.com/llvm/llvm-project/pull/73176

>From 5d4b6cac10d84bf8bf76d50730fdd5ef65261076 Mon Sep 17 00:00:00 2001
From: Sean Luchen <seanluchen at google.com>
Date: Fri, 17 Nov 2023 17:29:52 +0000
Subject: [PATCH] [clang][CodeGen] Emit atomic IR instead of libcalls for
 misaligned pointers.

Calling __atomic_fetch_op_n is undefined for misaligned pointers.

Since the backend can handle atomic IR on misaligned pointers, emit that
instead. To keep things simple, we make this change for all misaligned
operations, not just the integral ones.

There is an additional consequence of this change. Previously, libcalls
were emitted for misaligned, misshapen (size != 2^n), and oversized
objects. Since optimized libcalls only operate on 2^n sized members,
removing the misaligned case means optimized libcalls will never be
emitted, and all relevant codepaths can be cleaned up.

A simple correctness test is to have one thread perform an arithmetic
operation on a misaligned integer, and another thread perform a
non-arithmetic operation (e.g. xchg) on the same value. Such a test
exhibits incorrect behaviour currently.
---
 clang/lib/CodeGen/CGAtomic.cpp                | 336 ++++++------------
 clang/test/CodeGen/LoongArch/atomics.c        |   6 +-
 clang/test/CodeGen/PowerPC/quadword-atomics.c |   2 +-
 clang/test/CodeGen/RISCV/riscv-atomics.c      |  42 +--
 clang/test/CodeGen/arm-atomics-m.c            |   8 +-
 clang/test/CodeGen/arm-atomics-m0.c           |  16 +-
 clang/test/CodeGen/atomic-ops-libcall.c       | 119 ++++---
 clang/test/CodeGen/atomic-ops.c               |  25 +-
 clang/test/CodeGen/atomics-inlining.c         |  28 +-
 clang/test/CodeGen/c11atomics.c               |  18 +-
 .../test/CodeGenOpenCL/atomic-ops-libcall.cl  |  54 +--
 11 files changed, 273 insertions(+), 381 deletions(-)

diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 6005d5c51c0e1ac..c082590e1084df2 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -785,27 +785,75 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
   Builder.SetInsertPoint(ContBB);
 }
 
-static void
-AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
-                  bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
-                  SourceLocation Loc, CharUnits SizeInChars) {
-  if (UseOptimizedLibcall) {
-    // Load value and pass it to the function directly.
-    CharUnits Align = CGF.getContext().getTypeAlignInChars(ValTy);
-    int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
-    ValTy =
-        CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
-    llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), SizeInBits);
-    Address Ptr = Address(Val, ITy, Align);
-    Val = CGF.EmitLoadOfScalar(Ptr, false,
-                               CGF.getContext().getPointerType(ValTy),
-                               Loc);
-    // Coerce the value into an appropriately sized integer type.
-    Args.add(RValue::get(Val), ValTy);
-  } else {
-    // Non-optimized functions always take a reference.
-    Args.add(RValue::get(Val), CGF.getContext().VoidPtrTy);
+static bool hasUnoptimizedLibcall(AtomicExpr::AtomicOp op) {
+  switch (op) {
+  case AtomicExpr::AO__c11_atomic_init:
+  case AtomicExpr::AO__opencl_atomic_init:
+  case AtomicExpr::AO__atomic_compare_exchange:
+  case AtomicExpr::AO__atomic_compare_exchange_n:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+  case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+  case AtomicExpr::AO__atomic_exchange:
+  case AtomicExpr::AO__atomic_exchange_n:
+  case AtomicExpr::AO__c11_atomic_exchange:
+  case AtomicExpr::AO__hip_atomic_exchange:
+  case AtomicExpr::AO__opencl_atomic_exchange:
+  case AtomicExpr::AO__atomic_store:
+  case AtomicExpr::AO__atomic_store_n:
+  case AtomicExpr::AO__c11_atomic_store:
+  case AtomicExpr::AO__hip_atomic_store:
+  case AtomicExpr::AO__opencl_atomic_store:
+  case AtomicExpr::AO__atomic_load:
+  case AtomicExpr::AO__atomic_load_n:
+  case AtomicExpr::AO__c11_atomic_load:
+  case AtomicExpr::AO__hip_atomic_load:
+  case AtomicExpr::AO__opencl_atomic_load:
+    return true;
+  case AtomicExpr::AO__atomic_add_fetch:
+  case AtomicExpr::AO__atomic_fetch_add:
+  case AtomicExpr::AO__c11_atomic_fetch_add:
+  case AtomicExpr::AO__hip_atomic_fetch_add:
+  case AtomicExpr::AO__opencl_atomic_fetch_add:
+  case AtomicExpr::AO__atomic_and_fetch:
+  case AtomicExpr::AO__atomic_fetch_and:
+  case AtomicExpr::AO__c11_atomic_fetch_and:
+  case AtomicExpr::AO__hip_atomic_fetch_and:
+  case AtomicExpr::AO__opencl_atomic_fetch_and:
+  case AtomicExpr::AO__atomic_or_fetch:
+  case AtomicExpr::AO__atomic_fetch_or:
+  case AtomicExpr::AO__c11_atomic_fetch_or:
+  case AtomicExpr::AO__hip_atomic_fetch_or:
+  case AtomicExpr::AO__opencl_atomic_fetch_or:
+  case AtomicExpr::AO__atomic_sub_fetch:
+  case AtomicExpr::AO__atomic_fetch_sub:
+  case AtomicExpr::AO__c11_atomic_fetch_sub:
+  case AtomicExpr::AO__hip_atomic_fetch_sub:
+  case AtomicExpr::AO__opencl_atomic_fetch_sub:
+  case AtomicExpr::AO__atomic_xor_fetch:
+  case AtomicExpr::AO__atomic_fetch_xor:
+  case AtomicExpr::AO__c11_atomic_fetch_xor:
+  case AtomicExpr::AO__hip_atomic_fetch_xor:
+  case AtomicExpr::AO__opencl_atomic_fetch_xor:
+  case AtomicExpr::AO__atomic_nand_fetch:
+  case AtomicExpr::AO__atomic_fetch_nand:
+  case AtomicExpr::AO__c11_atomic_fetch_nand:
+  case AtomicExpr::AO__atomic_min_fetch:
+  case AtomicExpr::AO__atomic_fetch_min:
+  case AtomicExpr::AO__c11_atomic_fetch_min:
+  case AtomicExpr::AO__hip_atomic_fetch_min:
+  case AtomicExpr::AO__opencl_atomic_fetch_min:
+  case AtomicExpr::AO__atomic_max_fetch:
+  case AtomicExpr::AO__atomic_fetch_max:
+  case AtomicExpr::AO__c11_atomic_fetch_max:
+  case AtomicExpr::AO__hip_atomic_fetch_max:
+  case AtomicExpr::AO__opencl_atomic_fetch_max:
+    return false;
   }
+  llvm_unreachable("All atomic ops should be handled!");
 }
 
 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
@@ -833,7 +881,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
 
   bool Oversized = getContext().toBits(TInfo.Width) > MaxInlineWidthInBits;
   bool Misaligned = (Ptr.getAlignment() % TInfo.Width) != 0;
-  bool UseLibcall = Misaligned | Oversized;
+  bool PowerOf2Size = (Size & (Size - 1)) == 0;
+  bool UseLibcall =
+      (!PowerOf2Size | Oversized) & hasUnoptimizedLibcall(E->getOp());
   bool ShouldCastToIntPtrTy = true;
 
   CharUnits MaxInlineWidth =
@@ -994,98 +1044,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       Dest = Atomics.castToAtomicIntPointer(Dest);
   }
 
-  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
+  // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/Library.
+  // Clang should never generate an optimized libcall -- it's better the backend
+  // handle it.
   if (UseLibcall) {
-    bool UseOptimizedLibcall = false;
-    switch (E->getOp()) {
-    case AtomicExpr::AO__c11_atomic_init:
-    case AtomicExpr::AO__opencl_atomic_init:
-      llvm_unreachable("Already handled above with EmitAtomicInit!");
-
-    case AtomicExpr::AO__atomic_fetch_add:
-    case AtomicExpr::AO__atomic_fetch_and:
-    case AtomicExpr::AO__atomic_fetch_max:
-    case AtomicExpr::AO__atomic_fetch_min:
-    case AtomicExpr::AO__atomic_fetch_nand:
-    case AtomicExpr::AO__atomic_fetch_or:
-    case AtomicExpr::AO__atomic_fetch_sub:
-    case AtomicExpr::AO__atomic_fetch_xor:
-    case AtomicExpr::AO__atomic_add_fetch:
-    case AtomicExpr::AO__atomic_and_fetch:
-    case AtomicExpr::AO__atomic_max_fetch:
-    case AtomicExpr::AO__atomic_min_fetch:
-    case AtomicExpr::AO__atomic_nand_fetch:
-    case AtomicExpr::AO__atomic_or_fetch:
-    case AtomicExpr::AO__atomic_sub_fetch:
-    case AtomicExpr::AO__atomic_xor_fetch:
-    case AtomicExpr::AO__c11_atomic_fetch_add:
-    case AtomicExpr::AO__c11_atomic_fetch_and:
-    case AtomicExpr::AO__c11_atomic_fetch_max:
-    case AtomicExpr::AO__c11_atomic_fetch_min:
-    case AtomicExpr::AO__c11_atomic_fetch_nand:
-    case AtomicExpr::AO__c11_atomic_fetch_or:
-    case AtomicExpr::AO__c11_atomic_fetch_sub:
-    case AtomicExpr::AO__c11_atomic_fetch_xor:
-    case AtomicExpr::AO__hip_atomic_fetch_add:
-    case AtomicExpr::AO__hip_atomic_fetch_and:
-    case AtomicExpr::AO__hip_atomic_fetch_max:
-    case AtomicExpr::AO__hip_atomic_fetch_min:
-    case AtomicExpr::AO__hip_atomic_fetch_or:
-    case AtomicExpr::AO__hip_atomic_fetch_sub:
-    case AtomicExpr::AO__hip_atomic_fetch_xor:
-    case AtomicExpr::AO__opencl_atomic_fetch_add:
-    case AtomicExpr::AO__opencl_atomic_fetch_and:
-    case AtomicExpr::AO__opencl_atomic_fetch_max:
-    case AtomicExpr::AO__opencl_atomic_fetch_min:
-    case AtomicExpr::AO__opencl_atomic_fetch_or:
-    case AtomicExpr::AO__opencl_atomic_fetch_sub:
-    case AtomicExpr::AO__opencl_atomic_fetch_xor:
-      // For these, only library calls for certain sizes exist.
-      UseOptimizedLibcall = true;
-      break;
-
-    case AtomicExpr::AO__atomic_load:
-    case AtomicExpr::AO__atomic_store:
-    case AtomicExpr::AO__atomic_exchange:
-    case AtomicExpr::AO__atomic_compare_exchange:
-      // Use the generic version if we don't know that the operand will be
-      // suitably aligned for the optimized version.
-      if (Misaligned)
-        break;
-      [[fallthrough]];
-    case AtomicExpr::AO__atomic_load_n:
-    case AtomicExpr::AO__atomic_store_n:
-    case AtomicExpr::AO__atomic_exchange_n:
-    case AtomicExpr::AO__atomic_compare_exchange_n:
-    case AtomicExpr::AO__c11_atomic_load:
-    case AtomicExpr::AO__c11_atomic_store:
-    case AtomicExpr::AO__c11_atomic_exchange:
-    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
-    case AtomicExpr::AO__hip_atomic_load:
-    case AtomicExpr::AO__hip_atomic_store:
-    case AtomicExpr::AO__hip_atomic_exchange:
-    case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
-    case AtomicExpr::AO__opencl_atomic_load:
-    case AtomicExpr::AO__opencl_atomic_store:
-    case AtomicExpr::AO__opencl_atomic_exchange:
-    case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
-    case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
-      // Only use optimized library calls for sizes for which they exist.
-      // FIXME: Size == 16 optimized library functions exist too.
-      if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
-        UseOptimizedLibcall = true;
-      break;
-    }
-
     CallArgList Args;
-    if (!UseOptimizedLibcall) {
-      // For non-optimized library calls, the size is the first parameter
-      Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
-               getContext().getSizeType());
-    }
-    // Atomic address is the first or second parameter
+    // For non-optimized library calls, the size is the first parameter.
+    Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
+             getContext().getSizeType());
+
+    // The atomic address is the second parameter.
     // The OpenCL atomic library functions only accept pointer arguments to
     // generic address space.
     auto CastToGenericAddrSpace = [&](llvm::Value *V, QualType PT) {
@@ -1100,18 +1068,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       return getTargetHooks().performAddrSpaceCast(
           *this, V, AS, LangAS::opencl_generic, DestType, false);
     };
-
     Args.add(RValue::get(CastToGenericAddrSpace(Ptr.getPointer(),
                                                 E->getPtr()->getType())),
              getContext().VoidPtrTy);
 
+    // The next 1-3 parameters are op-dependent.
     std::string LibCallName;
-    QualType LoweredMemTy =
-      MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
     QualType RetTy;
     bool HaveRetTy = false;
-    llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
-    bool PostOpMinMax = false;
+
     switch (E->getOp()) {
     case AtomicExpr::AO__c11_atomic_init:
     case AtomicExpr::AO__opencl_atomic_init:
@@ -1122,8 +1087,6 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     // and exchange.
     // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
     //                                void *desired, int success, int failure)
-    // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
-    //                                  int success, int failure)
     case AtomicExpr::AO__atomic_compare_exchange:
     case AtomicExpr::AO__atomic_compare_exchange_n:
     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
@@ -1138,25 +1101,25 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
                                                   E->getVal1()->getType())),
                getContext().VoidPtrTy);
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
+      Args.add(RValue::get(CastToGenericAddrSpace(Val2.getPointer(),
+                                                  E->getVal2()->getType())),
+               getContext().VoidPtrTy);
       Args.add(RValue::get(Order), getContext().IntTy);
       Order = OrderFail;
       break;
     // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
     //                        int order)
-    // T __atomic_exchange_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_exchange:
     case AtomicExpr::AO__atomic_exchange_n:
     case AtomicExpr::AO__c11_atomic_exchange:
     case AtomicExpr::AO__hip_atomic_exchange:
     case AtomicExpr::AO__opencl_atomic_exchange:
       LibCallName = "__atomic_exchange";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
+      Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
+                                                  E->getVal1()->getType())),
+               getContext().VoidPtrTy);
       break;
     // void __atomic_store(size_t size, void *mem, void *val, int order)
-    // void __atomic_store_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_store:
     case AtomicExpr::AO__atomic_store_n:
     case AtomicExpr::AO__c11_atomic_store:
@@ -1165,11 +1128,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
       LibCallName = "__atomic_store";
       RetTy = getContext().VoidTy;
       HaveRetTy = true;
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
+      Args.add(RValue::get(CastToGenericAddrSpace(Val1.getPointer(),
+                                                  E->getVal1()->getType())),
+               getContext().VoidPtrTy);
       break;
     // void __atomic_load(size_t size, void *mem, void *return, int order)
-    // T __atomic_load_N(T *mem, int order)
     case AtomicExpr::AO__atomic_load:
     case AtomicExpr::AO__atomic_load_n:
     case AtomicExpr::AO__c11_atomic_load:
@@ -1177,166 +1140,69 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
     case AtomicExpr::AO__opencl_atomic_load:
       LibCallName = "__atomic_load";
       break;
-    // T __atomic_add_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_add_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_add_fetch:
-      PostOp = llvm::Instruction::Add;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_add:
     case AtomicExpr::AO__c11_atomic_fetch_add:
     case AtomicExpr::AO__hip_atomic_fetch_add:
     case AtomicExpr::AO__opencl_atomic_fetch_add:
-      LibCallName = "__atomic_fetch_add";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_and_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_and_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_and_fetch:
-      PostOp = llvm::Instruction::And;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_and:
     case AtomicExpr::AO__c11_atomic_fetch_and:
     case AtomicExpr::AO__hip_atomic_fetch_and:
     case AtomicExpr::AO__opencl_atomic_fetch_and:
-      LibCallName = "__atomic_fetch_and";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_or_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_or_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_or_fetch:
-      PostOp = llvm::Instruction::Or;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_or:
     case AtomicExpr::AO__c11_atomic_fetch_or:
     case AtomicExpr::AO__hip_atomic_fetch_or:
     case AtomicExpr::AO__opencl_atomic_fetch_or:
-      LibCallName = "__atomic_fetch_or";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_sub_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_sub_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_sub_fetch:
-      PostOp = llvm::Instruction::Sub;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_sub:
     case AtomicExpr::AO__c11_atomic_fetch_sub:
     case AtomicExpr::AO__hip_atomic_fetch_sub:
     case AtomicExpr::AO__opencl_atomic_fetch_sub:
-      LibCallName = "__atomic_fetch_sub";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_xor_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_xor_N(T *mem, T val, int order)
     case AtomicExpr::AO__atomic_xor_fetch:
-      PostOp = llvm::Instruction::Xor;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_xor:
     case AtomicExpr::AO__c11_atomic_fetch_xor:
     case AtomicExpr::AO__hip_atomic_fetch_xor:
     case AtomicExpr::AO__opencl_atomic_fetch_xor:
-      LibCallName = "__atomic_fetch_xor";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
+    case AtomicExpr::AO__atomic_nand_fetch:
+    case AtomicExpr::AO__atomic_fetch_nand:
+    case AtomicExpr::AO__c11_atomic_fetch_nand:
     case AtomicExpr::AO__atomic_min_fetch:
-      PostOpMinMax = true;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_min:
     case AtomicExpr::AO__c11_atomic_fetch_min:
     case AtomicExpr::AO__hip_atomic_fetch_min:
     case AtomicExpr::AO__opencl_atomic_fetch_min:
-      LibCallName = E->getValueType()->isSignedIntegerType()
-                        ? "__atomic_fetch_min"
-                        : "__atomic_fetch_umin";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
     case AtomicExpr::AO__atomic_max_fetch:
-      PostOpMinMax = true;
-      [[fallthrough]];
     case AtomicExpr::AO__atomic_fetch_max:
     case AtomicExpr::AO__c11_atomic_fetch_max:
     case AtomicExpr::AO__hip_atomic_fetch_max:
     case AtomicExpr::AO__opencl_atomic_fetch_max:
-      LibCallName = E->getValueType()->isSignedIntegerType()
-                        ? "__atomic_fetch_max"
-                        : "__atomic_fetch_umax";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        LoweredMemTy, E->getExprLoc(), TInfo.Width);
-      break;
-    // T __atomic_nand_fetch_N(T *mem, T val, int order)
-    // T __atomic_fetch_nand_N(T *mem, T val, int order)
-    case AtomicExpr::AO__atomic_nand_fetch:
-      PostOp = llvm::Instruction::And; // the NOT is special cased below
-      [[fallthrough]];
-    case AtomicExpr::AO__atomic_fetch_nand:
-    case AtomicExpr::AO__c11_atomic_fetch_nand:
-      LibCallName = "__atomic_fetch_nand";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
-                        MemTy, E->getExprLoc(), TInfo.Width);
-      break;
+      llvm_unreachable("Integral atomic operations always become atomicrmw!");
     }
 
     if (E->isOpenCL()) {
-      LibCallName = std::string("__opencl") +
-          StringRef(LibCallName).drop_front(1).str();
-
+      LibCallName =
+          std::string("__opencl") + StringRef(LibCallName).drop_front(1).str();
     }
-    // Optimized functions have the size in their name.
-    if (UseOptimizedLibcall)
-      LibCallName += "_" + llvm::utostr(Size);
     // By default, assume we return a value of the atomic type.
     if (!HaveRetTy) {
-      if (UseOptimizedLibcall) {
-        // Value is returned directly.
-        // The function returns an appropriately sized integer type.
-        RetTy = getContext().getIntTypeForBitwidth(
-            getContext().toBits(TInfo.Width), /*Signed=*/false);
-      } else {
-        // Value is returned through parameter before the order.
-        RetTy = getContext().VoidTy;
-        Args.add(RValue::get(Dest.getPointer()), getContext().VoidPtrTy);
-      }
+      // Value is returned through parameter before the order.
+      RetTy = getContext().VoidTy;
+      Args.add(RValue::get(CastToGenericAddrSpace(Dest.getPointer(), RetTy)),
+               getContext().VoidPtrTy);
     }
-    // order is always the last parameter
+    // Order is always the last parameter.
     Args.add(RValue::get(Order),
              getContext().IntTy);
     if (E->isOpenCL())
       Args.add(RValue::get(Scope), getContext().IntTy);
 
-    // PostOp is only needed for the atomic_*_fetch operations, and
-    // thus is only needed for and implemented in the
-    // UseOptimizedLibcall codepath.
-    assert(UseOptimizedLibcall || (!PostOp && !PostOpMinMax));
-
     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
     // The value is returned directly from the libcall.
     if (E->isCmpXChg())
       return Res;
 
-    // The value is returned directly for optimized libcalls but the expr
-    // provided an out-param.
-    if (UseOptimizedLibcall && Res.getScalarVal()) {
-      llvm::Value *ResVal = Res.getScalarVal();
-      if (PostOpMinMax) {
-        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
-        ResVal = EmitPostAtomicMinMax(Builder, E->getOp(),
-                                      E->getValueType()->isSignedIntegerType(),
-                                      ResVal, LoadVal1);
-      } else if (PostOp) {
-        llvm::Value *LoadVal1 = Args[1].getRValue(*this).getScalarVal();
-        ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
-      }
-      if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
-        ResVal = Builder.CreateNot(ResVal);
-
-      Builder.CreateStore(ResVal, Dest.withElementType(ResVal->getType()));
-    }
-
     if (RValTy->isVoidType())
       return RValue::get(nullptr);
 
diff --git a/clang/test/CodeGen/LoongArch/atomics.c b/clang/test/CodeGen/LoongArch/atomics.c
index edc58d30db186d7..ece32ccdce36b09 100644
--- a/clang/test/CodeGen/LoongArch/atomics.c
+++ b/clang/test/CodeGen/LoongArch/atomics.c
@@ -33,9 +33,9 @@ void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
 }
 
 void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
-  // LA32: call i64 @__atomic_load_8
-  // LA32: call void @__atomic_store_8
-  // LA32: call i64 @__atomic_fetch_add_8
+  // LA32: call void @__atomic_load(i32 noundef 8
+  // LA32: call void @__atomic_store(i32 noundef 8
+  // LA32: atomicrmw add ptr %a, i64 %b seq_cst
   // LA64: load atomic i64, ptr %a seq_cst, align 8
   // LA64: store atomic i64 %b, ptr %a seq_cst, align 8
   // LA64: atomicrmw add ptr %a, i64 %b seq_cst
diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c
index bff03b25d27ee9e..50762c13c106c84 100644
--- a/clang/test/CodeGen/PowerPC/quadword-atomics.c
+++ b/clang/test/CodeGen/PowerPC/quadword-atomics.c
@@ -45,7 +45,7 @@ void test_store(Q val, AtomicQ *ptr) {
 // PPC64-QUADWORD-ATOMICS:    [[TMP3:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
 //
 // PPC64-LABEL: @test_add(
-// PPC64:    [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(ptr noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
+// PPC64:    [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
 //
 void test_add(_Atomic(int128_t) *ptr, int128_t x) {
   // expected-no-diagnostics
diff --git a/clang/test/CodeGen/RISCV/riscv-atomics.c b/clang/test/CodeGen/RISCV/riscv-atomics.c
index f629ad7d72ea821..6fe571643775eaa 100644
--- a/clang/test/CodeGen/RISCV/riscv-atomics.c
+++ b/clang/test/CodeGen/RISCV/riscv-atomics.c
@@ -14,15 +14,15 @@
 #include <stdint.h>
 
 void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
-  // RV32I:  call zeroext i8 @__atomic_load_1
-  // RV32I:  call void @__atomic_store_1
-  // RV32I:  call zeroext i8 @__atomic_fetch_add_1
+  // RV32I:  call void @__atomic_load(i32 noundef 1
+  // RV32I:  call void @__atomic_store(i32 noundef 1
+  // RV32I:  atomicrmw add ptr %a, i8 %b seq_cst, align 1
   // RV32IA: load atomic i8, ptr %a seq_cst, align 1
   // RV32IA: store atomic i8 %b, ptr %a seq_cst, align 1
   // RV32IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
-  // RV64I:  call zeroext i8 @__atomic_load_1
-  // RV64I:  call void @__atomic_store_1
-  // RV64I:  call zeroext i8 @__atomic_fetch_add_1
+  // RV64I:  call void @__atomic_load(i64 noundef 1
+  // RV64I:  call void @__atomic_store(i64 noundef 1
+  // RV64I:  atomicrmw add ptr %a, i8 %b seq_cst, align 1
   // RV64IA: load atomic i8, ptr %a seq_cst, align 1
   // RV64IA: store atomic i8 %b, ptr %a seq_cst, align 1
   // RV64IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
@@ -32,15 +32,15 @@ void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
 }
 
 void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
-  // RV32I:  call i32 @__atomic_load_4
-  // RV32I:  call void @__atomic_store_4
-  // RV32I:  call i32 @__atomic_fetch_add_4
+  // RV32I:  call void @__atomic_load(i32 noundef 4
+  // RV32I:  call void @__atomic_store(i32 noundef 4
+  // RV32I:  atomicrmw add ptr %a, i32 %b seq_cst, align 4
   // RV32IA: load atomic i32, ptr %a seq_cst, align 4
   // RV32IA: store atomic i32 %b, ptr %a seq_cst, align 4
   // RV32IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
-  // RV64I:  call signext i32 @__atomic_load_4
-  // RV64I:  call void @__atomic_store_4
-  // RV64I:  call signext i32 @__atomic_fetch_add_4
+  // RV64I:  call void @__atomic_load(i64 noundef 4
+  // RV64I:  call void @__atomic_store(i64 noundef 4
+  // RV64I:  atomicrmw add ptr %a, i32 %b seq_cst, align 4
   // RV64IA: load atomic i32, ptr %a seq_cst, align 4
   // RV64IA: store atomic i32 %b, ptr %a seq_cst, align 4
   // RV64IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
@@ -50,15 +50,15 @@ void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
 }
 
 void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
-  // RV32I:  call i64 @__atomic_load_8
-  // RV32I:  call void @__atomic_store_8
-  // RV32I:  call i64 @__atomic_fetch_add_8
-  // RV32IA: call i64 @__atomic_load_8
-  // RV32IA: call void @__atomic_store_8
-  // RV32IA: call i64 @__atomic_fetch_add_8
-  // RV64I:  call i64 @__atomic_load_8
-  // RV64I:  call void @__atomic_store_8
-  // RV64I:  call i64 @__atomic_fetch_add_8
+  // RV32I:  call void @__atomic_load(i32 noundef 8
+  // RV32I:  call void @__atomic_store(i32 noundef 8
+  // RV32I:  atomicrmw add ptr %a, i64 %b seq_cst, align 8
+  // RV32IA: call void @__atomic_load(i32 noundef 8
+  // RV32IA: call void @__atomic_store(i32 noundef 8
+  // RV32IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
+  // RV64I:  call void @__atomic_load(i64 noundef 8
+  // RV64I:  call void @__atomic_store(i64 noundef 8
+  // RV64I:  atomicrmw add ptr %a, i64 %b seq_cst, align 8
   // RV64IA: load atomic i64, ptr %a seq_cst, align 8
   // RV64IA: store atomic i64 %b, ptr %a seq_cst, align 8
   // RV64IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
diff --git a/clang/test/CodeGen/arm-atomics-m.c b/clang/test/CodeGen/arm-atomics-m.c
index b9cc72bc6b98aba..952fe3e21f84734 100644
--- a/clang/test/CodeGen/arm-atomics-m.c
+++ b/clang/test/CodeGen/arm-atomics-m.c
@@ -22,14 +22,14 @@ void test_presence(void)
   r = 0;
   __atomic_store(&i, &r, memory_order_seq_cst);
 
-  // CHECK: __atomic_fetch_add_8
+  // CHECK: atomicrmw add ptr {{.*}} seq_cst, align 8
   __atomic_fetch_add(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_fetch_sub_8
+  // CHECK: atomicrmw sub ptr {{.*}} seq_cst, align 8
   __atomic_fetch_sub(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_load_8
+  // CHECK: __atomic_load(i32 noundef 8, ptr noundef @l, ptr noundef %rl, i32 noundef 5)
   long long rl;
   __atomic_load(&l, &rl, memory_order_seq_cst);
-  // CHECK: __atomic_store_8
+  // CHECK: __atomic_store(i32 noundef 8, ptr noundef @l, ptr noundef %rl, i32 noundef 5)
   rl = 0;
   __atomic_store(&l, &rl, memory_order_seq_cst);
 }
diff --git a/clang/test/CodeGen/arm-atomics-m0.c b/clang/test/CodeGen/arm-atomics-m0.c
index 335a1d2711f8087..3917e459e2671af 100644
--- a/clang/test/CodeGen/arm-atomics-m0.c
+++ b/clang/test/CodeGen/arm-atomics-m0.c
@@ -11,25 +11,25 @@ typedef enum memory_order {
 void test_presence(void)
 {
   // CHECK-LABEL: @test_presence
-  // CHECK: __atomic_fetch_add_4
+  // CHECK: atomicrmw add ptr {{.*}} seq_cst, align 4
   __atomic_fetch_add(&i, 1, memory_order_seq_cst);
-  // CHECK: __atomic_fetch_sub_4
+  // CHECK: atomicrmw sub {{.*}} seq_cst, align 4
   __atomic_fetch_sub(&i, 1, memory_order_seq_cst);
-  // CHECK: __atomic_load_4
+  // CHECK: __atomic_load(i32 noundef 4, ptr noundef @i, ptr noundef %r, i32 noundef 5)
   int r;
   __atomic_load(&i, &r, memory_order_seq_cst);
-  // CHECK: __atomic_store_4
+  // CHECK: __atomic_store(i32 noundef 4, ptr noundef @i, ptr noundef %r, i32 noundef 5)
   r = 0;
   __atomic_store(&i, &r, memory_order_seq_cst);
 
-  // CHECK: __atomic_fetch_add_8
+  // CHECK: atomicrmw add {{.*}} seq_cst, align 8
   __atomic_fetch_add(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_fetch_sub_8
+  // CHECK: atomicrmw sub {{.*}} seq_cst, align 8
   __atomic_fetch_sub(&l, 1, memory_order_seq_cst);
-  // CHECK: __atomic_load_8
+  // CHECK: __atomic_load(i32 noundef 8, ptr noundef @l, ptr noundef %rl, i32 noundef 5)
   long long rl;
   __atomic_load(&l, &rl, memory_order_seq_cst);
-  // CHECK: __atomic_store_8
+  // CHECK: __atomic_store(i32 noundef 8, ptr noundef @l, ptr noundef %rl, i32 noundef 5)
   rl = 0;
   __atomic_store(&l, &rl, memory_order_seq_cst);
 }
diff --git a/clang/test/CodeGen/atomic-ops-libcall.c b/clang/test/CodeGen/atomic-ops-libcall.c
index 745ccd22bf33f0a..2ec456a1a7a606d 100644
--- a/clang/test/CodeGen/atomic-ops-libcall.c
+++ b/clang/test/CodeGen/atomic-ops-libcall.c
@@ -1,7 +1,4 @@
-// RUN: %clang_cc1 < %s -triple armv5e-none-linux-gnueabi -emit-llvm -O1 | FileCheck %s
-
-// FIXME: This file should not be checking -O1 output.
-// Ie, it is testing many IR optimizer passes as part of front-end verification.
+// RUN: %clang_cc1 < %s -triple armv5e-none-linux-gnueabi -emit-llvm | FileCheck %s
 
 enum memory_order {
   memory_order_relaxed, memory_order_consume, memory_order_acquire,
@@ -9,112 +6,142 @@ enum memory_order {
 };
 
 int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) {
-  // CHECK: test_c11_atomic_fetch_add_int_ptr
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 12, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_add_int_ptr
+  // CHECK: store i32 12, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
 }
 
 int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) {
-  // CHECK: test_c11_atomic_fetch_sub_int_ptr
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 20, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_sub_int_ptr
+  // CHECK: store i32 20, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
 }
 
 int test_c11_atomic_fetch_add_int(_Atomic(int) *p) {
-  // CHECK: test_c11_atomic_fetch_add_int
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 3, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_add_int
+  // CHECK: store i32 3, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst);
 }
 
 int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) {
-  // CHECK: test_c11_atomic_fetch_sub_int
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 5, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_c11_atomic_fetch_sub_int
+  // CHECK: store i32 5, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst);
 }
 
 int *fp2a(int **p) {
-  // CHECK: @fp2a
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 4, i32 noundef 0)
+  // CHECK-LABEL: define{{.*}} @fp2a
+  // CHECK: store i32 4, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] monotonic, align 4
   // Note, the GNU builtins do not multiply by sizeof(T)!
   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
 }
 
 int test_atomic_fetch_add(int *p) {
-  // CHECK: test_atomic_fetch_add
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_add
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_add(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_sub(int *p) {
-  // CHECK: test_atomic_fetch_sub
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_sub
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_sub(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_and(int *p) {
-  // CHECK: test_atomic_fetch_and
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_and
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw and ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_and(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_or(int *p) {
-  // CHECK: test_atomic_fetch_or
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_or
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw or ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_or(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_xor(int *p) {
-  // CHECK: test_atomic_fetch_xor
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_xor
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw xor ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_xor(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_fetch_nand(int *p) {
-  // CHECK: test_atomic_fetch_nand
-  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
+  // CHECK-LABEL: define{{.*}} @test_atomic_fetch_nand
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: {{%[^ ]*}} = atomicrmw nand ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
   return __atomic_fetch_nand(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_add_fetch(int *p) {
-  // CHECK: test_atomic_add_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_add_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw add ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], [[TMP1]]
   return __atomic_add_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_sub_fetch(int *p) {
-  // CHECK: test_atomic_sub_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55
+  // CHECK-LABEL: define{{.*}} @test_atomic_sub_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw sub ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = sub i32 [[CALL]], [[TMP1]]
   return __atomic_sub_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_and_fetch(int *p) {
-  // CHECK: test_atomic_and_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_and_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw and ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = and i32 [[CALL]], [[TMP1]]
   return __atomic_and_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_or_fetch(int *p) {
-  // CHECK: test_atomic_or_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_or_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw or ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = or i32 [[CALL]], [[TMP1]]
   return __atomic_or_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_xor_fetch(int *p) {
-  // CHECK: test_atomic_xor_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55
+  // CHECK-LABEL: define{{.*}} @test_atomic_xor_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw xor ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], [[TMP1]]
   return __atomic_xor_fetch(p, 55, memory_order_seq_cst);
 }
 
 int test_atomic_nand_fetch(int *p) {
-  // CHECK: test_atomic_nand_fetch
-  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(ptr noundef %p, i32 noundef 55, i32 noundef 5)
-  // FIXME: We should not be checking optimized IR. It changes independently of clang.
-  // FIXME-CHECK: [[AND:%[^ ]*]] = and i32 [[CALL]], 55
-  // FIXME-CHECK: {{%[^ ]*}} = xor i32 [[AND]], -1
+  // CHECK-LABEL: define{{.*}} @test_atomic_nand_fetch
+  // CHECK: store i32 55, ptr [[ATOMICTMP:%[^ ]*]], align 4
+  // CHECK: [[TMP1:%[^ ]*]] = load i32, ptr [[ATOMICTMP]], align 4
+  // CHECK: [[CALL:%[^ ]*]] = atomicrmw nand ptr [[TMP0:%.*]], i32 [[TMP1]] seq_cst, align 4
+  // CHECK: [[TMP2:%[^ ]*]] = and i32 [[CALL]], [[TMP1]]
+  // CHECK: {{%[^ ]*}} = xor i32 [[TMP2]], -1
   return __atomic_nand_fetch(p, 55, memory_order_seq_cst);
 }
diff --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c
index 1295786524a0d3f..9a3ef87d2e2ecf9 100644
--- a/clang/test/CodeGen/atomic-ops.c
+++ b/clang/test/CodeGen/atomic-ops.c
@@ -198,7 +198,8 @@ struct S implicit_load(_Atomic(struct S) *a) {
 struct S fd1(struct S *a) {
   // CHECK-LABEL: @fd1
   // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
-  // CHECK: call void @__atomic_load(i32 noundef 8, ptr noundef {{.*}}, ptr noundef [[RETVAL]], i32 noundef 5)
+  // CHECK: [[TMP1:%.*]] = load atomic i64, ptr {{%.*}} seq_cst, align 4
+  // CHECK-NEXT: store i64 [[TMP1]], ptr [[RETVAL]], align 4
   // CHECK: ret
   struct S ret;
   __atomic_load(a, &ret, memory_order_seq_cst);
@@ -213,7 +214,8 @@ void fd2(struct S *a, struct S *b) {
   // CHECK-NEXT: store ptr %b, ptr [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-  // CHECK-NEXT: call void @__atomic_store(i32 noundef 8, ptr noundef [[LOAD_A_PTR]], ptr noundef [[LOAD_B_PTR]],
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+  // CHECK-NEXT: store atomic i64 [[LOAD_B]], ptr [[LOAD_A_PTR]] seq_cst, align 4
   // CHECK-NEXT: ret void
   __atomic_store(a, b, memory_order_seq_cst);
 }
@@ -229,7 +231,9 @@ void fd3(struct S *a, struct S *b, struct S *c) {
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-  // CHECK-NEXT: call void @__atomic_exchange(i32 noundef 8, ptr noundef [[LOAD_A_PTR]], ptr noundef [[LOAD_B_PTR]], ptr noundef [[LOAD_C_PTR]],
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+  // CHECK-NEXT: [[RESULT:%.*]] = atomicrmw xchg ptr [[LOAD_A_PTR]], i64 [[LOAD_B]] seq_cst, align 8
+  // CHECK-NEXT: store i64 [[RESULT]], ptr [[LOAD_C_PTR]], align 4
 
   __atomic_exchange(a, b, c, memory_order_seq_cst);
 }
@@ -245,8 +249,9 @@ _Bool fd4(struct S *a, struct S *b, struct S *c) {
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load ptr, ptr [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load ptr, ptr [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-  // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 8, ptr noundef [[LOAD_A_PTR]], ptr noundef [[LOAD_B_PTR]], ptr noundef [[LOAD_C_PTR]],
-  // CHECK-NEXT: ret i1 [[CALL]]
+  // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, ptr [[LOAD_B_PTR]], align 4
+  // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, ptr [[LOAD_C_PTR]], align 4
+  // CHECK-NEXT: {{.*}} = cmpxchg weak ptr [[LOAD_A_PTR]], i64 [[LOAD_B]], i64 [[LOAD_C]] seq_cst seq_cst, align 8
   return __atomic_compare_exchange(a, b, c, 1, 5, 5);
 }
 
@@ -682,13 +687,13 @@ void test_underaligned(void) {
   // CHECK-LABEL: @test_underaligned
   struct Underaligned { char c[8]; } underaligned_a, underaligned_b, underaligned_c;
 
-  // CHECK: call void @__atomic_load(i32 noundef 8,
+  // CHECK: load atomic i64, {{.*}}, align 1
   __atomic_load(&underaligned_a, &underaligned_b, memory_order_seq_cst);
-  // CHECK: call void @__atomic_store(i32 noundef 8,
+  // CHECK: store atomic i64 {{.*}}, align 1
   __atomic_store(&underaligned_a, &underaligned_b, memory_order_seq_cst);
-  // CHECK: call void @__atomic_exchange(i32 noundef 8,
+  // CHECK: atomicrmw xchg ptr {{.*}}, align 8
   __atomic_exchange(&underaligned_a, &underaligned_b, &underaligned_c, memory_order_seq_cst);
-  // CHECK: call {{.*}} @__atomic_compare_exchange(i32 noundef 8,
+  // CHECK: cmpxchg weak ptr {{.*}}, align 8
   __atomic_compare_exchange(&underaligned_a, &underaligned_b, &underaligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
 
   __attribute__((aligned)) struct Underaligned aligned_a, aligned_b, aligned_c;
@@ -772,7 +777,7 @@ void test_minmax_postop(int *si, unsigned *ui, unsigned short *us, signed char *
   // CHECK: store i8 [[NEW]], ptr
   *sc = __atomic_min_fetch(sc, 42, memory_order_release);
 
-  // CHECK: [[OLD:%.*]] = call i64 @__atomic_fetch_umin_8(ptr noundef {{%.*}}, i64 noundef [[RHS:%.*]],
+  // CHECK: [[OLD:%.*]] = atomicrmw umin ptr {{%.*}}, i64 [[RHS:%.*]] release, align 8
   // CHECK: [[TST:%.*]] = icmp ult i64 [[OLD]], [[RHS]]
   // CHECK: [[NEW:%.*]] = select i1 [[TST]], i64 [[OLD]], i64 [[RHS]]
   // CHECK: store i64 [[NEW]], ptr
diff --git a/clang/test/CodeGen/atomics-inlining.c b/clang/test/CodeGen/atomics-inlining.c
index 862c63076b2dc0a..d67f7f70442a0f9 100644
--- a/clang/test/CodeGen/atomics-inlining.c
+++ b/clang/test/CodeGen/atomics-inlining.c
@@ -38,14 +38,14 @@ void test1(void) {
   (void)__atomic_store(&a1, &a2, memory_order_seq_cst);
 
 // ARM-LABEL: define{{.*}} void @test1
-// ARM: = call{{.*}} zeroext i8 @__atomic_load_1(ptr noundef @c1
-// ARM: call{{.*}} void @__atomic_store_1(ptr noundef @c1, i8 noundef zeroext
-// ARM: = call{{.*}} zeroext i16 @__atomic_load_2(ptr noundef @s1
-// ARM: call{{.*}} void @__atomic_store_2(ptr noundef @s1, i16 noundef zeroext
-// ARM: = call{{.*}} i32 @__atomic_load_4(ptr noundef @i1
-// ARM: call{{.*}} void @__atomic_store_4(ptr noundef @i1, i32 noundef
-// ARM: = call{{.*}} i64 @__atomic_load_8(ptr noundef @ll1
-// ARM: call{{.*}} void @__atomic_store_8(ptr noundef @ll1, i64 noundef
+// ARM: call{{.*}} void @__atomic_load(i32 noundef 1, ptr noundef @c1, ptr noundef @c2
+// ARM: call{{.*}} void @__atomic_store(i32 noundef 1, ptr noundef @c1, ptr noundef @c2
+// ARM: call{{.*}} void @__atomic_load(i32 noundef 2, ptr noundef @s1, ptr noundef @s2
+// ARM: call{{.*}} void @__atomic_store(i32 noundef 2, ptr noundef @s1, ptr noundef @s2
+// ARM: call{{.*}} void @__atomic_load(i32 noundef 4, ptr noundef @i1, ptr noundef @i2
+// ARM: call{{.*}} void @__atomic_store(i32 noundef 4, ptr noundef @i1, ptr noundef @i2
+// ARM: call{{.*}} void @__atomic_load(i32 noundef 8, ptr noundef @ll1, ptr noundef @ll2
+// ARM: call{{.*}} void @__atomic_store(i32 noundef 8, ptr noundef @ll1, ptr noundef @ll2
 // ARM: call{{.*}} void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // ARM: call{{.*}} void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 
@@ -56,8 +56,8 @@ void test1(void) {
 // PPC32: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
 // PPC32: = load atomic i32, ptr @i1 seq_cst, align 4
 // PPC32: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
-// PPC32: = call i64 @__atomic_load_8(ptr noundef @ll1
-// PPC32: call void @__atomic_store_8(ptr noundef @ll1, i64
+// PPC32: call void @__atomic_load(i32 noundef 8, ptr noundef @ll1, ptr noundef @ll2
+// PPC32: call void @__atomic_store(i32 noundef 8, ptr noundef @ll1, ptr noundef @ll2
 // PPC32: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 // PPC32: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
 
@@ -80,8 +80,8 @@ void test1(void) {
 // MIPS32: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
 // MIPS32: = load atomic i32, ptr @i1 seq_cst, align 4
 // MIPS32: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
-// MIPS32: call i64 @__atomic_load_8(ptr noundef @ll1
-// MIPS32: call void @__atomic_store_8(ptr noundef @ll1, i64
+// MIPS32: call void @__atomic_load(i32 noundef signext 8, ptr noundef @ll1, ptr noundef @ll2
+// MIPS32: call void @__atomic_store(i32 noundef signext 8, ptr noundef @ll1, ptr noundef @ll2
 // MIPS32: call void @__atomic_load(i32 noundef signext 100, ptr noundef @a1, ptr noundef @a2
 // MIPS32: call void @__atomic_store(i32 noundef signext 100, ptr noundef @a1, ptr noundef @a2
 
@@ -104,8 +104,8 @@ void test1(void) {
 // SPARC: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2
 // SPARC: = load atomic i32, ptr @i1 seq_cst, align 4
 // SPARC: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4
-// SPARCV8: call i64 @__atomic_load_8(ptr noundef @ll1
-// SPARCV8: call void @__atomic_store_8(ptr noundef @ll1, i64
+// SPARCV8: call void @__atomic_load(i32 noundef 8, ptr noundef @ll1, ptr noundef @ll2
+// SPARCV8: call void @__atomic_store(i32 noundef 8, ptr noundef @ll1, ptr noundef @ll2
 // SPARCV9: load atomic i64, ptr @ll1 seq_cst, align 8
 // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8
 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2
diff --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index 773ed41991f7837..87c5c781b206fb6 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -343,10 +343,8 @@ PS test_promoted_load(_Atomic(PS) *addr) {
   // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
   // CHECK:   store ptr %addr, ptr [[ADDR_ARG]], align 4
   // CHECK:   [[ADDR:%.*]] = load ptr, ptr [[ADDR_ARG]], align 4
-  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(ptr noundef [[ADDR]], i32 noundef 5)
-  // CHECK:   store i64 [[RES]], ptr [[ATOMIC_RES]], align 8
+  // CHECK:   call arm_aapcscc void @__atomic_load(i32 noundef 8, ptr noundef [[ADDR]], ptr noundef [[ATOMIC_RES:%.*]], i32 noundef 5)
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
-
   return __c11_atomic_load(addr, 5);
 }
 
@@ -362,8 +360,7 @@ void test_promoted_store(_Atomic(PS) *addr, PS *val) {
   // CHECK:   [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
-  // CHECK:   [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 2
-  // CHECK:   call arm_aapcscc void @__atomic_store_8(ptr noundef [[ADDR]], i64 noundef [[VAL64]], i32 noundef 5)
+  // CHECK:   call arm_aapcscc void @__atomic_store(i32 noundef 8, ptr noundef [[ADDR]], ptr noundef [[ATOMIC_VAL]], i32 noundef 5)
   __c11_atomic_store(addr, *val, 5);
 }
 
@@ -380,9 +377,7 @@ PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
   // CHECK:   [[VAL:%.*]] = load ptr, ptr [[VAL_ARG]], align 4
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[VAL]], i32 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_VAL]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
-  // CHECK:   [[VAL64:%.*]] = load i64, ptr [[ATOMIC_VAL]], align 2
-  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(ptr noundef [[ADDR]], i64 noundef [[VAL64]], i32 noundef 5)
-  // CHECK:   store i64 [[RES]], ptr [[ATOMIC_RES]], align 8
+  // CHECK:   call arm_aapcscc void @__atomic_exchange(i32 noundef 8, ptr noundef [[ADDR]], ptr noundef [[ATOMIC_VAL]], ptr noundef [[ATOMIC_RES:%.*]], i32 noundef 5)
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 %agg.result, ptr align 8 [[ATOMIC_RES]], i32 6, i1 false)
   return __c11_atomic_exchange(addr, *val, 5);
 }
@@ -404,8 +399,7 @@ _Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
   // CHECK:   call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[NONATOMIC_TMP]], ptr align 2 [[NEW]], i32 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_DESIRED]], ptr align 2 [[DESIRED]], i64 6, i1 false)
   // CHECK:   call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_NEW]], ptr align 2 [[NONATOMIC_TMP]], i64 6, i1 false)
-  // CHECK:   [[NEW64:%.*]] = load i64, ptr [[ATOMIC_NEW]], align 2
-  // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(ptr noundef [[ADDR]], ptr noundef [[ATOMIC_DESIRED]], i64 noundef [[NEW64]], i32 noundef 5, i32 noundef 5)
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange(i32 noundef 8, ptr noundef [[ADDR]], ptr noundef [[ATOMIC_DESIRED]], ptr noundef [[ATOMIC_NEW]], i32 noundef 5, i32 noundef 5)
   // CHECK:   ret i1 [[RES]]
   return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
@@ -414,12 +408,12 @@ struct Empty {};
 
 struct Empty test_empty_struct_load(_Atomic(struct Empty)* empty) {
   // CHECK-LABEL: @test_empty_struct_load(
-  // CHECK: call arm_aapcscc zeroext i8 @__atomic_load_1(ptr noundef %{{.*}}, i32 noundef 5)
+  // CHECK: call arm_aapcscc void @__atomic_load(i32 noundef 1, ptr noundef {{%.*}}, ptr noundef {{%.*}} i32 noundef 5)
   return __c11_atomic_load(empty, 5);
 }
 
 void test_empty_struct_store(_Atomic(struct Empty)* empty, struct Empty value) {
   // CHECK-LABEL: @test_empty_struct_store(
-  // CHECK: call arm_aapcscc void @__atomic_store_1(ptr noundef %{{.*}}, i8 noundef zeroext %{{.*}}, i32 noundef 5)
+  // CHECK: call arm_aapcscc void @__atomic_store(i32 noundef 1, ptr noundef {{%.*}}, ptr noundef {{%.*}}, i32 noundef 5)
   __c11_atomic_store(empty, value, 5);
 }
diff --git a/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl b/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
index 2f020c210821242..89070b36a5bd7ed 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops-libcall.cl
@@ -20,63 +20,63 @@ typedef enum memory_scope {
 
 void f(atomic_int *i, global atomic_int *gi, local atomic_int *li, private atomic_int *pi, atomic_uint *ui, int cmp, int order, int scope) {
   int x;
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_load_4(ptr noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: call void @__opencl_atomic_load(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
+  // ARM: call void @__opencl_atomic_load(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
   x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: call void @__opencl_atomic_store(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
+  // ARM: call void @__opencl_atomic_store(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
   __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group);
 
   // SPIR: %[[GP:[0-9]+]] = addrspacecast ptr addrspace(1) {{%[0-9]+}} to ptr addrspace(4)
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef %[[GP]], i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: call void @__opencl_atomic_store(i64 noundef 4, ptr addrspace(4) noundef %[[GP]], ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
+  // ARM: call void @__opencl_atomic_store(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
   __opencl_atomic_store(gi, 1, memory_order_seq_cst, memory_scope_work_group);
 
   // SPIR: %[[GP:[0-9]+]] = addrspacecast ptr addrspace(3) {{%[0-9]+}} to ptr addrspace(4)
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef %[[GP]], i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: call void @__opencl_atomic_store(i64 noundef 4, ptr addrspace(4) noundef %[[GP]], ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
+  // ARM: call void @__opencl_atomic_store(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
   __opencl_atomic_store(li, 1, memory_order_seq_cst, memory_scope_work_group);
 
   // SPIR: %[[GP:[0-9]+]] = addrspacecast ptr {{%[0-9]+}} to ptr addrspace(4)
-  // SPIR: call void @__opencl_atomic_store_4(ptr addrspace(4) noundef %[[GP]], i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: call void @__opencl_atomic_store_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: call void @__opencl_atomic_store(i64 noundef 4, ptr addrspace(4) noundef %[[GP]], ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
+  // ARM: call void @__opencl_atomic_store(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 1)
   __opencl_atomic_store(pi, 1, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_add_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: {{%[^ ]*}} = atomicrmw add ptr addrspace(4) {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst, align 4
+  // ARM: {{%[^ ]*}} = atomicrmw add ptr {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst, align 4
   x = __opencl_atomic_fetch_add(i, 3, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_min_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: {{%[^ ]*}} = atomicrmw min ptr addrspace(4) {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst, align 4
+  // ARM: {{%[^ ]*}} = atomicrmw min ptr {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst, align 4
   x = __opencl_atomic_fetch_min(i, 3, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(ptr addrspace(4) noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call i32 @__opencl_atomic_fetch_umin_4(ptr noundef {{%[0-9]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 1)
+  // SPIR: {{%[^ ]*}} = atomicrmw umin ptr addrspace(4) {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst, align 4
+  // ARM: {{%[^ ]*}} = atomicrmw umin ptr {{%[0-9]+}}, i32 {{%[0-9]+}} seq_cst, align 4
   x = __opencl_atomic_fetch_min(ui, 3, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
   x = __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 1)
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_work_group);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 2)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 2)
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 2)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 2)
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_device);
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 3)
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 3)
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 3)
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, ptr noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 3)
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_all_svm_devices);
 
 #ifdef cl_khr_subgroups
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef 5, i32 noundef 5, i32 noundef 4)
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef 5, i32 noundef 5, i32 noundef 4)
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, memory_order_seq_cst, memory_order_seq_cst, memory_scope_sub_group);
 #endif
 
-  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
-  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange_4(ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, i32 noundef {{%[0-9]+}}, i32 noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
+  // SPIR: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i64 noundef 4, ptr addrspace(4) noundef {{%[0-9]+}}, ptr addrspace(4) noundef {{%[^,]+}}, ptr addrspace(4) noundef {{%.*}}, i32 noundef %{{.*}})
+  // ARM: {{%[^ ]*}} = call zeroext i1 @__opencl_atomic_compare_exchange(i32 noundef 4, ptr noundef {{%[0-9]+}}, ptr noundef {{%[^,]+}}, ptr noundef {{%.*}}, i32 noundef %{{.*}})
   x = __opencl_atomic_compare_exchange_weak(i, &cmp, 1, order, order, scope);
 }



More information about the cfe-commits mailing list