[clang] [CIR] Upstream __sync_<OP>_and_fetch builtins (PR #168347)

Thu Jan 15 16:45:41 PST 2026

https://github.com/HendrikHuebner updated https://github.com/llvm/llvm-project/pull/168347

>From 661817d4914aa54bf2d90748eb2aa448b1d967de Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Mon, 17 Nov 2025 11:42:01 +0100
Subject: [PATCH 01/10] [CIR] Upstream __sync_up_and_fetch builtins

---
 clang/lib/CIR/CodeGen/Address.h         |  28 +-
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 268 +++++++++++++
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp    |  23 ++
 clang/lib/CIR/CodeGen/CIRGenFunction.h  |   5 +
 clang/test/CIR/CodeGen/atomic.c         | 491 ++++++++++++++++++++++++
 5 files changed, 813 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h
index c8ce530a7b0d3..02a24a86b3c84 100644
--- a/clang/lib/CIR/CodeGen/Address.h
+++ b/clang/lib/CIR/CodeGen/Address.h
@@ -45,8 +45,12 @@ class Address {
 public:
   Address(mlir::Value pointer, mlir::Type elementType,
           clang::CharUnits alignment)
-      : pointerAndKnownNonNull(pointer, false), elementType(elementType),
-        alignment(alignment) {
+      : Address(pointer, elementType, alignment, false) {}
+
+  Address(mlir::Value pointer, mlir::Type elementType,
+          clang::CharUnits alignment, bool pointerAndKnownNonNull)
+      : pointerAndKnownNonNull(pointer, pointerAndKnownNonNull),
+        elementType(elementType), alignment(alignment) {
     assert(pointer && "Pointer cannot be null");
     assert(elementType && "Element type cannot be null");
     assert(!alignment.isZero() && "Alignment cannot be zero");
@@ -77,6 +81,13 @@ class Address {
     return Address(newPtr, getElementType(), getAlignment());
   }
 
+  /// Return address with different alignment, but same pointer and element
+  /// type.
+  Address withAlignment(clang::CharUnits newAlignment) const {
+    return Address(getPointer(), getElementType(), newAlignment,
+                   isKnownNonNull());
+  }
+
   /// Return address with different element type, a bitcast pointer, and
   /// the same alignment.
   Address withElementType(CIRGenBuilderTy &builder, mlir::Type ElemTy) const;
@@ -133,6 +144,19 @@ class Address {
   template <typename OpTy> OpTy getDefiningOp() const {
     return mlir::dyn_cast_or_null<OpTy>(getDefiningOp());
   }
+
+  /// Whether the pointer is known not to be null.
+  bool isKnownNonNull() const {
+    assert(isValid() && "Invalid address");
+    return static_cast<bool>(pointerAndKnownNonNull.getInt());
+  }
+
+  /// Set the non-null bit.
+  Address setKnownNonNull() {
+    assert(isValid() && "Invalid address");
+    pointerAndKnownNonNull.setInt(true);
+    return *this;
+  }
 };
 
 } // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 77f19343653db..a0a350ebe031c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -21,6 +21,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/DiagnosticFrontend.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -58,6 +59,107 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e,
   return RValue::get(result);
 }
 
+/// Emit the conversions required to turn the given value into an
+/// integer of the given size.
+static mlir::Value emitToInt(CIRGenFunction &cgf, mlir::Value v, QualType t,
+                             cir::IntType intType) {
+  v = cgf.emitToMemory(v, t);
+
+  if (isa<cir::PointerType>(v.getType()))
+    return cgf.getBuilder().createPtrToInt(v, intType);
+
+  assert(v.getType() == intType);
+  return v;
+}
+
+static mlir::Value emitFromInt(CIRGenFunction &cgf, mlir::Value v, QualType t,
+                               mlir::Type resultType) {
+  v = cgf.emitFromMemory(v, t);
+
+  if (isa<cir::PointerType>(resultType))
+    return cgf.getBuilder().createIntToPtr(v, resultType);
+
+  assert(v.getType() == resultType);
+  return v;
+}
+
+static Address checkAtomicAlignment(CIRGenFunction &cgf, const CallExpr *e) {
+  ASTContext &astContext = cgf.getContext();
+  Address ptr = cgf.emitPointerWithAlignment(e->getArg(0));
+  unsigned bytes =
+      isa<cir::PointerType>(ptr.getElementType())
+          ? astContext.getTypeSizeInChars(astContext.VoidPtrTy).getQuantity()
+          : cgf.cgm.getDataLayout().getTypeSizeInBits(ptr.getElementType()) / 8;
+  unsigned align = ptr.getAlignment().getQuantity();
+  if (align % bytes != 0) {
+    DiagnosticsEngine &diags = cgf.cgm.getDiags();
+    diags.Report(e->getBeginLoc(), diag::warn_sync_op_misaligned);
+    // Force address to be at least naturally-aligned.
+    return ptr.withAlignment(CharUnits::fromQuantity(bytes));
+  }
+  return ptr;
+}
+
+/// Utility to insert an atomic instruction based on Intrinsic::ID
+/// and the expression node.
+static mlir::Value makeBinaryAtomicValue(
+    CIRGenFunction &cgf, cir::AtomicFetchKind kind, const CallExpr *expr,
+    mlir::Value *neededValP = nullptr,
+    cir::MemOrder ordering = cir::MemOrder::SequentiallyConsistent) {
+
+  QualType type = expr->getType();
+  QualType ptrType = expr->getArg(0)->getType();
+
+  assert(ptrType->isPointerType());
+  assert(
+      cgf.getContext().hasSameUnqualifiedType(type, ptrType->getPointeeType()));
+  assert(cgf.getContext().hasSameUnqualifiedType(type,
+                                                 expr->getArg(1)->getType()));
+
+  Address destAddr = checkAtomicAlignment(cgf, expr);
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  cir::IntType intType =
+      ptrType->getPointeeType()->isUnsignedIntegerType()
+          ? builder.getUIntNTy(cgf.getContext().getTypeSize(type))
+          : builder.getSIntNTy(cgf.getContext().getTypeSize(type));
+  mlir::Value val = cgf.emitScalarExpr(expr->getArg(1));
+  mlir::Type valueType = val.getType();
+  val = emitToInt(cgf, val, type, intType);
+
+  // This output argument is needed for post atomic fetch operations
+  // that calculate the result of the operation as return value of
+  // <binop>_and_fetch builtins. The `AtomicFetch` operation only updates the
+  // memory location and returns the old value.
+  if (neededValP) {
+    *neededValP = val;
+  }
+
+  auto rmwi = cir::AtomicFetchOp::create(
+      builder, cgf.getLoc(expr->getSourceRange()), destAddr.emitRawPointer(),
+      val, kind, ordering, false, /* is volatile */
+      true);                      /* fetch first */
+  return emitFromInt(cgf, rmwi->getResult(0), type, valueType);
+}
+
+static RValue emitBinaryAtomicPost(CIRGenFunction &cgf,
+                                   cir::AtomicFetchKind atomicOpkind,
+                                   const CallExpr *e, cir::BinOpKind binopKind,
+                                   bool invert = false) {
+  mlir::Value val;
+  clang::QualType typ = e->getType();
+  mlir::Value result = makeBinaryAtomicValue(cgf, atomicOpkind, e, &val);
+  clang::CIRGen::CIRGenBuilderTy &builder = cgf.getBuilder();
+  result = cir::BinOp::create(builder, result.getLoc(), binopKind, result, val);
+
+  if (invert) {
+    result = cir::UnaryOp::create(builder, result.getLoc(),
+                                  cir::UnaryOpKind::Not, result);
+  }
+
+  result = emitFromInt(cgf, result, typ, val.getType());
+  return RValue::get(result);
+}
+
 RValue CIRGenFunction::emitRotate(const CallExpr *e, bool isRotateLeft) {
   mlir::Value input = emitScalarExpr(e->getArg(0));
   mlir::Value amount = emitScalarExpr(e->getArg(1));
@@ -520,6 +622,172 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     cir::PrefetchOp::create(builder, loc, address, locality, isWrite);
     return RValue::get(nullptr);
   }
+  case Builtin::BI__sync_fetch_and_add:
+  case Builtin::BI__sync_fetch_and_sub:
+  case Builtin::BI__sync_fetch_and_or:
+  case Builtin::BI__sync_fetch_and_and:
+  case Builtin::BI__sync_fetch_and_xor:
+  case Builtin::BI__sync_fetch_and_nand:
+  case Builtin::BI__sync_add_and_fetch:
+  case Builtin::BI__sync_sub_and_fetch:
+  case Builtin::BI__sync_and_and_fetch:
+  case Builtin::BI__sync_or_and_fetch:
+  case Builtin::BI__sync_xor_and_fetch:
+  case Builtin::BI__sync_nand_and_fetch:
+  case Builtin::BI__sync_val_compare_and_swap:
+  case Builtin::BI__sync_bool_compare_and_swap:
+  case Builtin::BI__sync_lock_test_and_set:
+  case Builtin::BI__sync_lock_release:
+  case Builtin::BI__sync_swap:
+    llvm_unreachable("Shouldn't make it through sema");
+
+  case Builtin::BI__sync_fetch_and_add_1:
+  case Builtin::BI__sync_fetch_and_add_2:
+  case Builtin::BI__sync_fetch_and_add_4:
+  case Builtin::BI__sync_fetch_and_add_8:
+  case Builtin::BI__sync_fetch_and_add_16:
+    llvm_unreachable("BI__sync_fetch_and_add NYI");
+  case Builtin::BI__sync_fetch_and_sub_1:
+  case Builtin::BI__sync_fetch_and_sub_2:
+  case Builtin::BI__sync_fetch_and_sub_4:
+  case Builtin::BI__sync_fetch_and_sub_8:
+  case Builtin::BI__sync_fetch_and_sub_16:
+    llvm_unreachable("BI__sync_fetch_and_sub NYI");
+
+  case Builtin::BI__sync_fetch_and_or_1:
+  case Builtin::BI__sync_fetch_and_or_2:
+  case Builtin::BI__sync_fetch_and_or_4:
+  case Builtin::BI__sync_fetch_and_or_8:
+  case Builtin::BI__sync_fetch_and_or_16:
+    llvm_unreachable("BI__sync_fetch_and_or NYI");
+  case Builtin::BI__sync_fetch_and_and_1:
+  case Builtin::BI__sync_fetch_and_and_2:
+  case Builtin::BI__sync_fetch_and_and_4:
+  case Builtin::BI__sync_fetch_and_and_8:
+  case Builtin::BI__sync_fetch_and_and_16:
+    llvm_unreachable("BI__sync_fetch_and_and NYI");
+  case Builtin::BI__sync_fetch_and_xor_1:
+  case Builtin::BI__sync_fetch_and_xor_2:
+  case Builtin::BI__sync_fetch_and_xor_4:
+  case Builtin::BI__sync_fetch_and_xor_8:
+  case Builtin::BI__sync_fetch_and_xor_16:
+    llvm_unreachable("BI__sync_fetch_and_xor NYI");
+  case Builtin::BI__sync_fetch_and_nand_1:
+  case Builtin::BI__sync_fetch_and_nand_2:
+  case Builtin::BI__sync_fetch_and_nand_4:
+  case Builtin::BI__sync_fetch_and_nand_8:
+  case Builtin::BI__sync_fetch_and_nand_16:
+    llvm_unreachable("BI__sync_fetch_and_nand NYI");
+
+  // Clang extensions: not overloaded yet.
+  case Builtin::BI__sync_fetch_and_min:
+    llvm_unreachable("BI__sync_fetch_and_min NYI");
+  case Builtin::BI__sync_fetch_and_max:
+    llvm_unreachable("BI__sync_fetch_and_max NYI");
+  case Builtin::BI__sync_fetch_and_umin:
+    llvm_unreachable("BI__sync_fetch_and_umin NYI");
+  case Builtin::BI__sync_fetch_and_umax:
+    llvm_unreachable("BI__sync_fetch_and_umax NYI");
+
+  case Builtin::BI__sync_add_and_fetch_1:
+  case Builtin::BI__sync_add_and_fetch_2:
+  case Builtin::BI__sync_add_and_fetch_4:
+  case Builtin::BI__sync_add_and_fetch_8:
+  case Builtin::BI__sync_add_and_fetch_16:
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Add, e,
+                                cir::BinOpKind::Add);
+
+  case Builtin::BI__sync_sub_and_fetch_1:
+  case Builtin::BI__sync_sub_and_fetch_2:
+  case Builtin::BI__sync_sub_and_fetch_4:
+  case Builtin::BI__sync_sub_and_fetch_8:
+  case Builtin::BI__sync_sub_and_fetch_16:
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Sub, e,
+                                cir::BinOpKind::Sub);
+
+  case Builtin::BI__sync_and_and_fetch_1:
+  case Builtin::BI__sync_and_and_fetch_2:
+  case Builtin::BI__sync_and_and_fetch_4:
+  case Builtin::BI__sync_and_and_fetch_8:
+  case Builtin::BI__sync_and_and_fetch_16:
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::And, e,
+                                cir::BinOpKind::And);
+
+  case Builtin::BI__sync_or_and_fetch_1:
+  case Builtin::BI__sync_or_and_fetch_2:
+  case Builtin::BI__sync_or_and_fetch_4:
+  case Builtin::BI__sync_or_and_fetch_8:
+  case Builtin::BI__sync_or_and_fetch_16:
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Or, e,
+                                cir::BinOpKind::Or);
+
+  case Builtin::BI__sync_xor_and_fetch_1:
+  case Builtin::BI__sync_xor_and_fetch_2:
+  case Builtin::BI__sync_xor_and_fetch_4:
+  case Builtin::BI__sync_xor_and_fetch_8:
+  case Builtin::BI__sync_xor_and_fetch_16:
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Xor, e,
+                                cir::BinOpKind::Xor);
+
+  case Builtin::BI__sync_nand_and_fetch_1:
+  case Builtin::BI__sync_nand_and_fetch_2:
+  case Builtin::BI__sync_nand_and_fetch_4:
+  case Builtin::BI__sync_nand_and_fetch_8:
+  case Builtin::BI__sync_nand_and_fetch_16:
+    return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Nand, e,
+                                cir::BinOpKind::And, true);
+
+  case Builtin::BI__sync_val_compare_and_swap_1:
+  case Builtin::BI__sync_val_compare_and_swap_2:
+  case Builtin::BI__sync_val_compare_and_swap_4:
+  case Builtin::BI__sync_val_compare_and_swap_8:
+  case Builtin::BI__sync_val_compare_and_swap_16:
+    llvm_unreachable("BI__sync_val_compare_and_swap NYI");
+  case Builtin::BI__sync_bool_compare_and_swap_1:
+  case Builtin::BI__sync_bool_compare_and_swap_2:
+  case Builtin::BI__sync_bool_compare_and_swap_4:
+  case Builtin::BI__sync_bool_compare_and_swap_8:
+  case Builtin::BI__sync_bool_compare_and_swap_16:
+    llvm_unreachable("BI__sync_bool_compare_and_swap NYI");
+  case Builtin::BI__sync_swap_1:
+  case Builtin::BI__sync_swap_2:
+  case Builtin::BI__sync_swap_4:
+  case Builtin::BI__sync_swap_8:
+  case Builtin::BI__sync_swap_16:
+    llvm_unreachable("BI__sync_swap1 like NYI");
+  case Builtin::BI__sync_lock_test_and_set_1:
+  case Builtin::BI__sync_lock_test_and_set_2:
+  case Builtin::BI__sync_lock_test_and_set_4:
+  case Builtin::BI__sync_lock_test_and_set_8:
+  case Builtin::BI__sync_lock_test_and_set_16:
+    llvm_unreachable("BI__sync_lock_test_and_set_1 like NYI");
+  case Builtin::BI__sync_lock_release_1:
+  case Builtin::BI__sync_lock_release_2:
+  case Builtin::BI__sync_lock_release_4:
+  case Builtin::BI__sync_lock_release_8:
+  case Builtin::BI__sync_lock_release_16:
+    llvm_unreachable("BI__sync_lock_release_1 like NYI");
+  case Builtin::BI__sync_synchronize:
+    llvm_unreachable("BI__sync_synchronize NYI");
+  case Builtin::BI__builtin_nontemporal_load:
+    llvm_unreachable("BI__builtin_nontemporal_load NYI");
+  case Builtin::BI__builtin_nontemporal_store:
+    llvm_unreachable("BI__builtin_nontemporal_store NYI");
+  case Builtin::BI__c11_atomic_is_lock_free:
+    llvm_unreachable("BI__c11_atomic_is_lock_free NYI");
+  case Builtin::BI__atomic_is_lock_free:
+    llvm_unreachable("BI__atomic_is_lock_free NYI");
+  case Builtin::BI__atomic_test_and_set:
+    llvm_unreachable("BI__atomic_test_and_set NYI");
+  case Builtin::BI__atomic_clear:
+    llvm_unreachable("BI__atomic_clear NYI");
+  case Builtin::BI__atomic_thread_fence:
+    llvm_unreachable("BI__atomic_thread_fence NYI");
+  case Builtin::BI__atomic_signal_fence:
+    llvm_unreachable("BI__atomic_signal_fence NYI");
+  case Builtin::BI__c11_atomic_thread_fence:
+  case Builtin::BI__c11_atomic_signal_fence:
+    llvm_unreachable("BI__c11_atomic_thread_fence like NYI");
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 91a59d60fcb3e..ac633a012d592 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -553,6 +553,14 @@ mlir::Value CIRGenFunction::emitToMemory(mlir::Value value, QualType ty) {
   return value;
 }
 
+mlir::Value CIRGenFunction::emitFromMemory(mlir::Value value, QualType ty) {
+  if (!ty->isBooleanType() && hasBooleanRepresentation(ty)) {
+    llvm_unreachable("NIY");
+  }
+
+  return value;
+}
+
 void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue,
                                        bool isInit) {
   if (lvalue.getType()->isConstantMatrixType()) {
@@ -1921,6 +1929,21 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
   return callResult;
 }
 
+// TODO: this can also be abstrated into common AST helpers
+bool CIRGenFunction::hasBooleanRepresentation(QualType type) {
+
+  if (type->isBooleanType())
+    return true;
+
+  if (const EnumType *enumType = type->getAs<EnumType>())
+    return enumType->getDecl()->getIntegerType()->isBooleanType();
+
+  if (const AtomicType *atomicType = type->getAs<AtomicType>())
+    return hasBooleanRepresentation(atomicType->getValueType());
+
+  return false;
+}
+
 CIRGenCallee CIRGenFunction::emitCallee(const clang::Expr *e) {
   e = e->IgnoreParens();
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 00f289bcd1bb2..be6facfd77e04 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1370,6 +1370,7 @@ class CIRGenFunction : public CIRGenTypeCache {
   RValue emitCallExpr(const clang::CallExpr *e,
                       ReturnValueSlot returnValue = ReturnValueSlot());
   LValue emitCallExprLValue(const clang::CallExpr *e);
+  bool hasBooleanRepresentation(QualType type);
   CIRGenCallee emitCallee(const clang::Expr *e);
 
   template <typename T>
@@ -1756,6 +1757,10 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// to conserve the high level information.
   mlir::Value emitToMemory(mlir::Value value, clang::QualType ty);
 
+  /// EmitFromMemory - Change a scalar value from its memory
+  /// representation to its value representation.
+  mlir::Value emitFromMemory(mlir::Value value, clang::QualType ty);
+
   /// Emit a trap instruction, which is used to abort the program in an abnormal
   /// way, usually for debugging purposes.
   /// \p createNewBlock indicates whether to create a new block for the IR
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index d5bea8446d730..fd814459d614b 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -1133,3 +1133,494 @@ int c11_atomic_fetch_nand(_Atomic(int) *ptr, int value) {
   // OGCG:      %[[RES:.+]] = atomicrmw nand ptr %{{.+}}, i32 %{{.+}} seq_cst, align 4
   // OGCG-NEXT: store i32 %[[RES]], ptr %{{.+}}, align 4
 }
+
+// CHECK-LABEL: @test_op_and_fetch
+// LLVM-LABEL: @test_op_and_fetch
+void test_op_and_fetch() {
+  signed char sc;
+  unsigned char uc;
+  signed short ss;
+  unsigned short us;
+  signed int si;
+  unsigned int ui;
+  signed long long sll;
+  unsigned long long ull;
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  sc = __sync_add_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(add, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = add i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  uc = __sync_add_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(add, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = add i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  ss = __sync_add_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(add, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = add i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  us = __sync_add_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(add, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = add i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  si = __sync_add_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(add, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = add i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  ui = __sync_add_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(add, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = add i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  sll = __sync_add_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(add, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = add i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  ull = __sync_add_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(sub, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = sub i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  sc = __sync_sub_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(sub, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = sub i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  uc = __sync_sub_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(sub, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = sub i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  ss = __sync_sub_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(sub, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = sub i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  us = __sync_sub_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(sub, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = sub i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  si = __sync_sub_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(sub, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = sub i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  ui = __sync_sub_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(sub, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = sub i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  sll = __sync_sub_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(sub, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = sub i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  ull = __sync_sub_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = and i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  sc = __sync_and_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = and i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  uc = __sync_and_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = and i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  ss = __sync_and_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = and i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  us = __sync_and_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = and i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  si = __sync_and_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = and i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  ui = __sync_and_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = and i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  sll = __sync_and_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = and i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  ull = __sync_and_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(or, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = or i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  sc = __sync_or_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(or, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = or i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  uc = __sync_or_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(or, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = or i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  ss = __sync_or_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(or, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = or i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  us = __sync_or_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(or, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = or i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  si = __sync_or_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(or, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = or i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  ui = __sync_or_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(or, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = or i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  sll = __sync_or_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(or, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = or i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  ull = __sync_or_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(xor, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = xor i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  sc = __sync_xor_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[RET1:%.*]] = cir.binop(xor, [[RES1]], [[VAL1]]) : !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[RET1:%.*]] = xor i8 [[RES1]], [[VAL1]]
+  // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  uc = __sync_xor_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[RET2:%.*]] = cir.binop(xor, [[RES2]], [[VAL2]]) : !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[RET2:%.*]] = xor i16 [[RES2]], [[CONV2]]
+  // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  ss = __sync_xor_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[RET3:%.*]] = cir.binop(xor, [[RES3]], [[VAL3]]) : !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[RET3:%.*]] = xor i16 [[RES3]], [[CONV3]]
+  // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  us = __sync_xor_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[RET4:%.*]] = cir.binop(xor, [[RES4]], [[VAL4]]) : !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[RET4:%.*]] = xor i32 [[RES4]], [[CONV4]]
+  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  si = __sync_xor_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[RET5:%.*]] = cir.binop(xor, [[RES5]], [[VAL5]]) : !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[RET5:%.*]] = xor i32 [[RES5]], [[CONV5]]
+  // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  ui = __sync_xor_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[RET6:%.*]] = cir.binop(xor, [[RES6]], [[VAL6]]) : !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[RET6:%.*]] = xor i64 [[RES6]], [[CONV6]]
+  // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  sll = __sync_xor_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[RET7:%.*]] = cir.binop(xor, [[RES7]], [[VAL7]]) : !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[RET7:%.*]] = xor i64 [[RES7]], [[CONV7]]
+  // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  ull = __sync_xor_and_fetch(&ull, uc);
+
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[INTERM0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
+  // CHECK: [[RET0:%.*]] =  cir.unary(not, [[INTERM0]]) : !s8i, !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw nand ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[INTERM0:%.*]] = and i8 [[RES0]], [[VAL0]]
+  // LLVM:  [[RET0:%.*]] = xor i8 [[INTERM0]], -1
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  sc = __sync_nand_and_fetch(&sc, uc);
+
+  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
+  // CHECK: [[INTERM1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
+  // CHECK: [[RET1:%.*]] = cir.unary(not, [[INTERM1]]) : !u8i, !u8i
+  // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES1:%.*]] = atomicrmw nand ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // LLVM:  [[INTERM1:%.*]] = and i8 [[RES1]], [[VAL1]]
+  // LLVM:  [[RET1:%.*]] = xor i8 [[INTERM1]], -1
+  // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  uc = __sync_nand_and_fetch(&uc, uc);
+
+  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
+  // CHECK: [[INTERM2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
+  // CHECK: [[RET2:%.*]] =  cir.unary(not, [[INTERM2]]) : !s16i, !s16i
+  // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // LLVM:  [[RES2:%.*]] = atomicrmw nand ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // LLVM:  [[INTERM2:%.*]] = and i16 [[RES2]], [[CONV2]]
+  // LLVM:  [[RET2:%.*]] = xor i16 [[INTERM2]], -1
+  // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  ss = __sync_nand_and_fetch(&ss, uc);
+
+  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
+  // CHECK: [[INTERM3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
+  // CHECK: [[RET3:%.*]] =  cir.unary(not, [[INTERM3]]) : !u16i, !u16i
+  // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // LLVM:  [[RES3:%.*]] = atomicrmw nand ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // LLVM:  [[INTERM3:%.*]] = and i16 [[RES3]], [[CONV3]]
+  // LLVM:  [[RET3:%.*]] = xor i16 [[INTERM3]], -1
+  // LLVM:  store i16 [[RET3]], ptr %{{.*}}, align 2
+  us = __sync_nand_and_fetch(&us, uc);
+
+  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
+  // CHECK: [[INTERM4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
+  // CHECK: [[RET4:%.*]] =  cir.unary(not, [[INTERM4]]) : !s32i, !s32i
+  // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // LLVM:  [[RES4:%.*]] = atomicrmw nand ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // LLVM:  [[INTERM4:%.*]] = and i32 [[RES4]], [[CONV4]]
+  // LLVM:  [[RET4:%.*]] = xor i32 [[INTERM4]], -1
+  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  si = __sync_nand_and_fetch(&si, uc);
+
+  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
+  // CHECK: [[INTERM5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
+  // CHECK: [[RET5:%.*]] =  cir.unary(not, [[INTERM5]]) : !u32i, !u32i
+  // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // LLVM:  [[RES5:%.*]] = atomicrmw nand ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // LLVM:  [[INTERM5:%.*]] = and i32 [[RES5]], [[CONV5]]
+  // LLVM:  [[RET5:%.*]] = xor i32 [[INTERM5]], -1
+  // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  ui = __sync_nand_and_fetch(&ui, uc);
+
+  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
+  // CHECK: [[INTERM6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
+  // CHECK: [[RET6:%.*]] =  cir.unary(not, [[INTERM6]]) : !s64i, !s64i
+  // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // LLVM:  [[RES6:%.*]] = atomicrmw nand ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // LLVM:  [[INTERM6:%.*]] = and i64 [[RES6]], [[CONV6]]
+  // LLVM:  [[RET6:%.*]] = xor i64 [[INTERM6]], -1
+  // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  sll = __sync_nand_and_fetch(&sll, uc);
+
+  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
+  // CHECK: [[INTERM7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
+  // CHECK: [[RET7:%.*]] =  cir.unary(not, [[INTERM7]]) : !u64i, !u64i
+  // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // LLVM:  [[RES7:%.*]] = atomicrmw nand ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // LLVM:  [[INTERM7:%.*]] = and i64 [[RES7]], [[CONV7]]
+  // LLVM:  [[RET7:%.*]] = xor i64 [[INTERM7]], -1
+  // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  ull = __sync_nand_and_fetch(&ull, uc);
+}

>From cfde1012801591fc9c58f0468f62158fef115608 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Wed, 26 Nov 2025 22:04:19 +0100
Subject: [PATCH 02/10] Feedback

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp  |  35 ++--
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp     |  42 ++--
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp |   5 +
 clang/lib/CIR/CodeGen/CIRGenFunction.h   |   4 +-
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp    |  25 +++
 clang/lib/CIR/CodeGen/CIRGenTypes.h      |   2 +
 clang/test/CIR/CodeGen/atomic.c          | 237 ++++++++++++++++++++++-
 7 files changed, 304 insertions(+), 46 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index a0a350ebe031c..490b34ac29560 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -65,7 +65,7 @@ static mlir::Value emitToInt(CIRGenFunction &cgf, mlir::Value v, QualType t,
                              cir::IntType intType) {
   v = cgf.emitToMemory(v, t);
 
-  if (isa<cir::PointerType>(v.getType()))
+  if (mlir::isa<cir::PointerType>(v.getType()))
     return cgf.getBuilder().createPtrToInt(v, intType);
 
   assert(v.getType() == intType);
@@ -76,7 +76,7 @@ static mlir::Value emitFromInt(CIRGenFunction &cgf, mlir::Value v, QualType t,
                                mlir::Type resultType) {
   v = cgf.emitFromMemory(v, t);
 
-  if (isa<cir::PointerType>(resultType))
+  if (mlir::isa<cir::PointerType>(resultType))
     return cgf.getBuilder().createIntToPtr(v, resultType);
 
   assert(v.getType() == resultType);
@@ -87,9 +87,11 @@ static Address checkAtomicAlignment(CIRGenFunction &cgf, const CallExpr *e) {
   ASTContext &astContext = cgf.getContext();
   Address ptr = cgf.emitPointerWithAlignment(e->getArg(0));
   unsigned bytes =
-      isa<cir::PointerType>(ptr.getElementType())
+      mlir::isa<cir::PointerType>(ptr.getElementType())
           ? astContext.getTypeSizeInChars(astContext.VoidPtrTy).getQuantity()
-          : cgf.cgm.getDataLayout().getTypeSizeInBits(ptr.getElementType()) / 8;
+          : cgf.cgm.getDataLayout().getTypeSizeInBits(ptr.getElementType()) /
+                                 cgf.cgm.getASTContext().getCharWidth();
+
   unsigned align = ptr.getAlignment().getQuantity();
   if (align % bytes != 0) {
     DiagnosticsEngine &diags = cgf.cgm.getDiags();
@@ -130,9 +132,8 @@ static mlir::Value makeBinaryAtomicValue(
   // that calculate the result of the operation as return value of
   // <binop>_and_fetch builtins. The `AtomicFetch` operation only updates the
   // memory location and returns the old value.
-  if (neededValP) {
+  if (neededValP)
     *neededValP = val;
-  }
 
   auto rmwi = cir::AtomicFetchOp::create(
       builder, cgf.getLoc(expr->getSourceRange()), destAddr.emitRawPointer(),
@@ -151,10 +152,9 @@ static RValue emitBinaryAtomicPost(CIRGenFunction &cgf,
   clang::CIRGen::CIRGenBuilderTy &builder = cgf.getBuilder();
   result = cir::BinOp::create(builder, result.getLoc(), binopKind, result, val);
 
-  if (invert) {
+  if (invert)
     result = cir::UnaryOp::create(builder, result.getLoc(),
                                   cir::UnaryOpKind::Not, result);
-  }
 
   result = emitFromInt(cgf, result, typ, val.getType());
   return RValue::get(result);
@@ -639,56 +639,43 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__sync_lock_test_and_set:
   case Builtin::BI__sync_lock_release:
   case Builtin::BI__sync_swap:
-    llvm_unreachable("Shouldn't make it through sema");
-
   case Builtin::BI__sync_fetch_and_add_1:
   case Builtin::BI__sync_fetch_and_add_2:
   case Builtin::BI__sync_fetch_and_add_4:
   case Builtin::BI__sync_fetch_and_add_8:
   case Builtin::BI__sync_fetch_and_add_16:
-    llvm_unreachable("BI__sync_fetch_and_add NYI");
   case Builtin::BI__sync_fetch_and_sub_1:
   case Builtin::BI__sync_fetch_and_sub_2:
   case Builtin::BI__sync_fetch_and_sub_4:
   case Builtin::BI__sync_fetch_and_sub_8:
   case Builtin::BI__sync_fetch_and_sub_16:
-    llvm_unreachable("BI__sync_fetch_and_sub NYI");
-
   case Builtin::BI__sync_fetch_and_or_1:
   case Builtin::BI__sync_fetch_and_or_2:
   case Builtin::BI__sync_fetch_and_or_4:
   case Builtin::BI__sync_fetch_and_or_8:
   case Builtin::BI__sync_fetch_and_or_16:
-    llvm_unreachable("BI__sync_fetch_and_or NYI");
   case Builtin::BI__sync_fetch_and_and_1:
   case Builtin::BI__sync_fetch_and_and_2:
   case Builtin::BI__sync_fetch_and_and_4:
   case Builtin::BI__sync_fetch_and_and_8:
   case Builtin::BI__sync_fetch_and_and_16:
-    llvm_unreachable("BI__sync_fetch_and_and NYI");
   case Builtin::BI__sync_fetch_and_xor_1:
   case Builtin::BI__sync_fetch_and_xor_2:
   case Builtin::BI__sync_fetch_and_xor_4:
   case Builtin::BI__sync_fetch_and_xor_8:
   case Builtin::BI__sync_fetch_and_xor_16:
-    llvm_unreachable("BI__sync_fetch_and_xor NYI");
   case Builtin::BI__sync_fetch_and_nand_1:
   case Builtin::BI__sync_fetch_and_nand_2:
   case Builtin::BI__sync_fetch_and_nand_4:
   case Builtin::BI__sync_fetch_and_nand_8:
   case Builtin::BI__sync_fetch_and_nand_16:
-    llvm_unreachable("BI__sync_fetch_and_nand NYI");
-
-  // Clang extensions: not overloaded yet.
   case Builtin::BI__sync_fetch_and_min:
-    llvm_unreachable("BI__sync_fetch_and_min NYI");
   case Builtin::BI__sync_fetch_and_max:
-    llvm_unreachable("BI__sync_fetch_and_max NYI");
   case Builtin::BI__sync_fetch_and_umin:
-    llvm_unreachable("BI__sync_fetch_and_umin NYI");
   case Builtin::BI__sync_fetch_and_umax:
-    llvm_unreachable("BI__sync_fetch_and_umax NYI");
-
+    cgm.errorNYI(e->getSourceRange(),
+                           "__sync_fetch_and_* builtins NYI");
+    return getUndefRValue(e->getType());
   case Builtin::BI__sync_add_and_fetch_1:
   case Builtin::BI__sync_add_and_fetch_2:
   case Builtin::BI__sync_add_and_fetch_4:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index ac633a012d592..0390d77776c78 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -547,15 +547,32 @@ LValue CIRGenFunction::emitLValueForFieldInitialization(
 }
 
 mlir::Value CIRGenFunction::emitToMemory(mlir::Value value, QualType ty) {
-  // Bool has a different representation in memory than in registers,
-  // but in ClangIR, it is simply represented as a cir.bool value.
-  // This function is here as a placeholder for possible future changes.
+  if (auto *atomicTy = ty->getAs<AtomicType>())
+    ty = atomicTy->getValueType();
+
+  if (ty->isExtVectorBoolType()) {
+    cgm.errorNYI("emitToMemory: extVectorBoolType");
+  }
+
+  if (ty->hasBooleanRepresentation() || ty->isBitIntType()) {
+    mlir::Type storeType = convertTypeForLoadStore(ty, value.getType());
+    return builder.createIntCast(value, storeType);
+  }
+
   return value;
 }
 
 mlir::Value CIRGenFunction::emitFromMemory(mlir::Value value, QualType ty) {
-  if (!ty->isBooleanType() && hasBooleanRepresentation(ty)) {
-    llvm_unreachable("NIY");
+  if (auto *atomicTy = ty->getAs<AtomicType>())
+    ty = atomicTy->getValueType();
+
+  if (ty->isPackedVectorBoolType(getContext())) {
+    cgm.errorNYI("emitFromMemory: PackedVectorBoolType");
+  }
+
+  if (ty->hasBooleanRepresentation() || ty->isBitIntType() || ty->isExtVectorBoolType()) {
+    mlir::Type resTy = convertType(ty);
+    return builder.createIntCast(value, resTy);
   }
 
   return value;
@@ -1929,21 +1946,6 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
   return callResult;
 }
 
-// TODO: this can also be abstrated into common AST helpers
-bool CIRGenFunction::hasBooleanRepresentation(QualType type) {
-
-  if (type->isBooleanType())
-    return true;
-
-  if (const EnumType *enumType = type->getAs<EnumType>())
-    return enumType->getDecl()->getIntegerType()->isBooleanType();
-
-  if (const AtomicType *atomicType = type->getAs<AtomicType>())
-    return hasBooleanRepresentation(atomicType->getValueType());
-
-  return false;
-}
-
 CIRGenCallee CIRGenFunction::emitCallee(const clang::Expr *e) {
   e = e->IgnoreParens();
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 866fda3166f41..28f9a01df7b2a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -32,6 +32,11 @@ CIRGenFunction::CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder,
 
 CIRGenFunction::~CIRGenFunction() {}
 
+mlir::Type CIRGenFunction::convertTypeForLoadStore(QualType astType,
+                                                     mlir::Type mlirType) {
+  return cgm.getTypes().convertTypeForLoadStore(astType, mlirType);
+}
+
 // This is copied from clang/lib/CodeGen/CodeGenFunction.cpp
 cir::TypeEvaluationKind CIRGenFunction::getEvaluationKind(QualType type) {
   type = type.getCanonicalType();
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index be6facfd77e04..8ac9c9b38ce24 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -202,6 +202,9 @@ class CIRGenFunction : public CIRGenTypeCache {
     return convertType(getContext().getTypeDeclType(t));
   }
 
+  mlir::Type convertTypeForLoadStore(QualType astType,
+                                                     mlir::Type mlirType);
+
   ///  Return the cir::TypeEvaluationKind of QualType \c type.
   static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type);
 
@@ -1370,7 +1373,6 @@ class CIRGenFunction : public CIRGenTypeCache {
   RValue emitCallExpr(const clang::CallExpr *e,
                       ReturnValueSlot returnValue = ReturnValueSlot());
   LValue emitCallExprLValue(const clang::CallExpr *e);
-  bool hasBooleanRepresentation(QualType type);
   CIRGenCallee emitCallee(const clang::Expr *e);
 
   template <typename T>
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index efc2c6c0ba500..3f2b795f734fc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -2,6 +2,7 @@
 
 #include "CIRGenFunctionInfo.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/BuiltinTypes.h"
 
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/GlobalDecl.h"
@@ -116,6 +117,30 @@ std::string CIRGenTypes::getRecordTypeName(const clang::RecordDecl *recordDecl,
   return builder.getUniqueRecordName(std::string(typeName));
 }
 
+mlir::Type CIRGenTypes::convertTypeForLoadStore(QualType qualType,
+                                                 mlir::Type mlirType) {
+  if (!mlirType) {
+    convertType(qualType);
+
+    if (mlir::isa<mlir::IntegerType>(mlirType) &&
+        mlir::cast<mlir::IntegerType>(mlirType).getWidth() == 1)
+      return mlir::IntegerType::get(&getMLIRContext(),
+                                    astContext.getTypeSize(qualType));
+    
+     return mlirType;
+  }
+
+  if (qualType->isBitIntType())
+    return mlir::IntegerType::get(
+        &getMLIRContext(), astContext.getTypeSizeInChars(qualType).getQuantity() * astContext.getCharWidth());
+
+
+  if (qualType->isExtVectorBoolType())
+    return convertTypeForMem(qualType);
+
+  return mlirType;
+}
+
 /// Return true if the specified type is already completely laid out.
 bool CIRGenTypes::isRecordLayoutComplete(const Type *ty) const {
   const auto it = recordDeclTypes.find(ty);
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
index e79cdfc9f8224..027b26f037924 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -118,6 +118,8 @@ class CIRGenTypes {
   std::string getRecordTypeName(const clang::RecordDecl *,
                                 llvm::StringRef suffix);
 
+  mlir::Type convertTypeForLoadStore(QualType qualType,
+                                                 mlir::Type mlirType);
   const CIRGenRecordLayout &getCIRGenRecordLayout(const clang::RecordDecl *rd);
 
   /// Convert type T into an mlir::Type. This differs from convertType in that
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index fd814459d614b..d45f43168c563 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -1153,6 +1153,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
   // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_add_and_fetch(&sc, uc);
 
   // CHECK: [[RES1:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
@@ -1161,6 +1165,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES1:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = add i8 [[RES1]], [[VAL1]]
   // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES1:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // OGCG:  [[RET1:%.*]] = add i8 [[RES1]], [[VAL1]]
+  // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_add_and_fetch(&uc, uc);
 
   // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
@@ -1171,6 +1179,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES2:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
   // LLVM:  [[RET2:%.*]] = add i16 [[RES2]], [[CONV2]]
   // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // OGCG:  [[RES2:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // OGCG:  [[RET2:%.*]] = add i16 [[RES2]], [[CONV2]]
+  // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_add_and_fetch(&ss, uc);
 
   // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
@@ -1181,6 +1194,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES3:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
   // LLVM:  [[RET3:%.*]] = add i16 [[RES3]], [[CONV3]]
   // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  // OGCG:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // OGCG:  [[RES3:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // OGCG:  [[RET3:%.*]] = add i16 [[RES3]], [[CONV3]]
+  // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_add_and_fetch(&us, uc);
 
   // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
@@ -1191,6 +1209,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES4:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
   // LLVM:  [[RET4:%.*]] = add i32 [[RES4]], [[CONV4]]
   // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // OGCG:  [[RES4:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // OGCG:  [[RET4:%.*]] = add i32 [[RES4]], [[CONV4]]
+  // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_add_and_fetch(&si, uc);
 
   // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
@@ -1201,6 +1224,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES5:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
   // LLVM:  [[RET5:%.*]] = add i32 [[RES5]], [[CONV5]]
   // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // OGCG:  [[RES5:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // OGCG:  [[RET5:%.*]] = add i32 [[RES5]], [[CONV5]]
+  // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_add_and_fetch(&ui, uc);
 
   // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
@@ -1211,6 +1239,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES6:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
   // LLVM:  [[RET6:%.*]] = add i64 [[RES6]], [[CONV6]]
   // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // OGCG:  [[RES6:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // OGCG:  [[RET6:%.*]] = add i64 [[RES6]], [[CONV6]]
+  // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_add_and_fetch(&sll, uc);
 
   // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
@@ -1221,6 +1254,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES7:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
   // LLVM:  [[RET7:%.*]] = add i64 [[RES7]], [[CONV7]]
   // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // OGCG:  [[RES7:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // OGCG:  [[RET7:%.*]] = add i64 [[RES7]], [[CONV7]]
+  // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_add_and_fetch(&ull, uc);
 
   // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
@@ -1230,6 +1268,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES0:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = sub i8 [[RES0]], [[VAL0]]
   // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[RET0:%.*]] = sub i8 [[RES0]], [[VAL0]]
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_sub_and_fetch(&sc, uc);
 
   // CHECK: [[RES1:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
@@ -1238,6 +1280,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES1:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = sub i8 [[RES1]], [[VAL1]]
   // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES1:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // OGCG:  [[RET1:%.*]] = sub i8 [[RES1]], [[VAL1]]
+  // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_sub_and_fetch(&uc, uc);
 
   // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
@@ -1248,6 +1294,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES2:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
   // LLVM:  [[RET2:%.*]] = sub i16 [[RES2]], [[CONV2]]
   // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // OGCG:  [[RES2:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // OGCG:  [[RET2:%.*]] = sub i16 [[RES2]], [[CONV2]]
+  // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_sub_and_fetch(&ss, uc);
 
   // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
@@ -1258,6 +1309,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES3:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
   // LLVM:  [[RET3:%.*]] = sub i16 [[RES3]], [[CONV3]]
   // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  // OGCG:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // OGCG:  [[RES3:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // OGCG:  [[RET3:%.*]] = sub i16 [[RES3]], [[CONV3]]
+  // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_sub_and_fetch(&us, uc);
 
   // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
@@ -1267,7 +1323,11 @@ void test_op_and_fetch() {
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
   // LLVM:  [[RET4:%.*]] = sub i32 [[RES4]], [[CONV4]]
-  // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // OGCG:  [[RES4:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // OGCG:  [[RET4:%.*]] = sub i32 [[RES4]], [[CONV4]]
+  // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_sub_and_fetch(&si, uc);
 
   // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
@@ -1278,6 +1338,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES5:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
   // LLVM:  [[RET5:%.*]] = sub i32 [[RES5]], [[CONV5]]
   // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // OGCG:  [[RES5:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // OGCG:  [[RET5:%.*]] = sub i32 [[RES5]], [[CONV5]]
+  // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_sub_and_fetch(&ui, uc);
 
   // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
@@ -1288,6 +1353,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES6:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
   // LLVM:  [[RET6:%.*]] = sub i64 [[RES6]], [[CONV6]]
   // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // OGCG:  [[RES6:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // OGCG:  [[RET6:%.*]] = sub i64 [[RES6]], [[CONV6]]
+  // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_sub_and_fetch(&sll, uc);
 
   // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
@@ -1298,6 +1368,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES7:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
   // LLVM:  [[RET7:%.*]] = sub i64 [[RES7]], [[CONV7]]
   // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // OGCG:  [[RES7:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // OGCG:  [[RET7:%.*]] = sub i64 [[RES7]], [[CONV7]]
+  // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_sub_and_fetch(&ull, uc);
 
   // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
@@ -1307,6 +1382,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES0:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = and i8 [[RES0]], [[VAL0]]
   // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[RET0:%.*]] = and i8 [[RES0]], [[VAL0]]
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_and_and_fetch(&sc, uc);
 
   // CHECK: [[RES1:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
@@ -1315,6 +1394,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES1:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = and i8 [[RES1]], [[VAL1]]
   // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES1:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // OGCG:  [[RET1:%.*]] = and i8 [[RES1]], [[VAL1]]
+  // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_and_and_fetch(&uc, uc);
 
   // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
@@ -1325,6 +1408,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES2:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
   // LLVM:  [[RET2:%.*]] = and i16 [[RES2]], [[CONV2]]
   // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // OGCG:  [[RES2:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // OGCG:  [[RET2:%.*]] = and i16 [[RES2]], [[CONV2]]
+  // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_and_and_fetch(&ss, uc);
 
   // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
@@ -1335,6 +1423,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES3:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
   // LLVM:  [[RET3:%.*]] = and i16 [[RES3]], [[CONV3]]
   // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  // OGCG:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // OGCG:  [[RES3:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // OGCG:  [[RET3:%.*]] = and i16 [[RES3]], [[CONV3]]
+  // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_and_and_fetch(&us, uc);
 
   // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
@@ -1345,6 +1438,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES4:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
   // LLVM:  [[RET4:%.*]] = and i32 [[RES4]], [[CONV4]]
   // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // OGCG:  [[RES4:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // OGCG:  [[RET4:%.*]] = and i32 [[RES4]], [[CONV4]]
+  // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_and_and_fetch(&si, uc);
 
   // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
@@ -1355,6 +1453,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES5:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
   // LLVM:  [[RET5:%.*]] = and i32 [[RES5]], [[CONV5]]
   // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // OGCG:  [[RES5:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // OGCG:  [[RET5:%.*]] = and i32 [[RES5]], [[CONV5]]
+  // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_and_and_fetch(&ui, uc);
 
   // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
@@ -1365,6 +1468,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES6:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
   // LLVM:  [[RET6:%.*]] = and i64 [[RES6]], [[CONV6]]
   // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // OGCG:  [[RES6:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // OGCG:  [[RET6:%.*]] = and i64 [[RES6]], [[CONV6]]
+  // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_and_and_fetch(&sll, uc);
 
   // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
@@ -1375,6 +1483,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES7:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
   // LLVM:  [[RET7:%.*]] = and i64 [[RES7]], [[CONV7]]
   // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // OGCG:  [[RES7:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // OGCG:  [[RET7:%.*]] = and i64 [[RES7]], [[CONV7]]
+  // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_and_and_fetch(&ull, uc);
 
   // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
@@ -1384,6 +1497,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES0:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = or i8 [[RES0]], [[VAL0]]
   // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[RET0:%.*]] = or i8 [[RES0]], [[VAL0]]
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_or_and_fetch(&sc, uc);
 
   // CHECK: [[RES1:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
@@ -1392,6 +1509,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES1:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = or i8 [[RES1]], [[VAL1]]
   // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES1:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // OGCG:  [[RET1:%.*]] = or i8 [[RES1]], [[VAL1]]
+  // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_or_and_fetch(&uc, uc);
 
   // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
@@ -1402,6 +1523,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES2:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
   // LLVM:  [[RET2:%.*]] = or i16 [[RES2]], [[CONV2]]
   // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // OGCG:  [[RES2:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // OGCG:  [[RET2:%.*]] = or i16 [[RES2]], [[CONV2]]
+  // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_or_and_fetch(&ss, uc);
 
   // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
@@ -1412,6 +1538,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES3:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
   // LLVM:  [[RET3:%.*]] = or i16 [[RES3]], [[CONV3]]
   // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  // OGCG:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // OGCG:  [[RES3:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // OGCG:  [[RET3:%.*]] = or i16 [[RES3]], [[CONV3]]
+  // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_or_and_fetch(&us, uc);
 
   // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
@@ -1422,6 +1553,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES4:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
   // LLVM:  [[RET4:%.*]] = or i32 [[RES4]], [[CONV4]]
   // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // OGCG:  [[RES4:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // OGCG:  [[RET4:%.*]] = or i32 [[RES4]], [[CONV4]]
+  // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_or_and_fetch(&si, uc);
 
   // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
@@ -1432,6 +1568,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES5:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
   // LLVM:  [[RET5:%.*]] = or i32 [[RES5]], [[CONV5]]
   // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // OGCG:  [[RES5:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // OGCG:  [[RET5:%.*]] = or i32 [[RES5]], [[CONV5]]
+  // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_or_and_fetch(&ui, uc);
 
   // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
@@ -1442,6 +1583,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES6:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
   // LLVM:  [[RET6:%.*]] = or i64 [[RES6]], [[CONV6]]
   // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // OGCG:  [[RES6:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // OGCG:  [[RET6:%.*]] = or i64 [[RES6]], [[CONV6]]
+  // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_or_and_fetch(&sll, uc);
 
   // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
@@ -1452,6 +1598,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES7:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
   // LLVM:  [[RET7:%.*]] = or i64 [[RES7]], [[CONV7]]
   // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // OGCG:  [[RES7:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // OGCG:  [[RET7:%.*]] = or i64 [[RES7]], [[CONV7]]
+  // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_or_and_fetch(&ull, uc);
 
   // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
@@ -1461,6 +1612,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES0:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = xor i8 [[RES0]], [[VAL0]]
   // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[RET0:%.*]] = xor i8 [[RES0]], [[VAL0]]
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_xor_and_fetch(&sc, uc);
 
   // CHECK: [[RES1:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
@@ -1469,6 +1624,10 @@ void test_op_and_fetch() {
   // LLVM:  [[RES1:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = xor i8 [[RES1]], [[VAL1]]
   // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES1:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // OGCG:  [[RET1:%.*]] = xor i8 [[RES1]], [[VAL1]]
+  // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_xor_and_fetch(&uc, uc);
 
   // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
@@ -1479,6 +1638,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES2:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
   // LLVM:  [[RET2:%.*]] = xor i16 [[RES2]], [[CONV2]]
   // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // OGCG:  [[RES2:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // OGCG:  [[RET2:%.*]] = xor i16 [[RES2]], [[CONV2]]
+  // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_xor_and_fetch(&ss, uc);
 
   // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
@@ -1489,6 +1653,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES3:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
   // LLVM:  [[RET3:%.*]] = xor i16 [[RES3]], [[CONV3]]
   // LLVM:  store i16 [[RET3]], ptr %{{.*}}
+  // OGCG:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // OGCG:  [[RES3:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // OGCG:  [[RET3:%.*]] = xor i16 [[RES3]], [[CONV3]]
+  // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_xor_and_fetch(&us, uc);
 
   // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
@@ -1499,6 +1668,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES4:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
   // LLVM:  [[RET4:%.*]] = xor i32 [[RES4]], [[CONV4]]
   // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // OGCG:  [[RES4:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // OGCG:  [[RET4:%.*]] = xor i32 [[RES4]], [[CONV4]]
+  // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_xor_and_fetch(&si, uc);
 
   // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
@@ -1509,6 +1683,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES5:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
   // LLVM:  [[RET5:%.*]] = xor i32 [[RES5]], [[CONV5]]
   // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // OGCG:  [[RES5:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // OGCG:  [[RET5:%.*]] = xor i32 [[RES5]], [[CONV5]]
+  // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_xor_and_fetch(&ui, uc);
 
   // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
@@ -1519,6 +1698,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES6:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
   // LLVM:  [[RET6:%.*]] = xor i64 [[RES6]], [[CONV6]]
   // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // OGCG:  [[RES6:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // OGCG:  [[RET6:%.*]] = xor i64 [[RES6]], [[CONV6]]
+  // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_xor_and_fetch(&sll, uc);
 
   // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
@@ -1529,6 +1713,11 @@ void test_op_and_fetch() {
   // LLVM:  [[RES7:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
   // LLVM:  [[RET7:%.*]] = xor i64 [[RES7]], [[CONV7]]
   // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // OGCG:  [[RES7:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // OGCG:  [[RET7:%.*]] = xor i64 [[RES7]], [[CONV7]]
+  // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_xor_and_fetch(&ull, uc);
 
   // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
@@ -1540,6 +1729,11 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM0:%.*]] = and i8 [[RES0]], [[VAL0]]
   // LLVM:  [[RET0:%.*]] = xor i8 [[INTERM0]], -1
   // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw nand ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[INTERM0:%.*]] = and i8 [[RES0]], [[VAL0]]
+  // OGCG:  [[RET0:%.*]] = xor i8 [[INTERM0]], -1
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_nand_and_fetch(&sc, uc);
 
   // CHECK: [[RES1:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
@@ -1550,6 +1744,11 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM1:%.*]] = and i8 [[RES1]], [[VAL1]]
   // LLVM:  [[RET1:%.*]] = xor i8 [[INTERM1]], -1
   // LLVM:  store i8 [[RET1]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES1:%.*]] = atomicrmw nand ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
+  // OGCG:  [[INTERM1:%.*]] = and i8 [[RES1]], [[VAL1]]
+  // OGCG:  [[RET1:%.*]] = xor i8 [[INTERM1]], -1
+  // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_nand_and_fetch(&uc, uc);
 
   // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
@@ -1562,6 +1761,12 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM2:%.*]] = and i16 [[RES2]], [[CONV2]]
   // LLVM:  [[RET2:%.*]] = xor i16 [[INTERM2]], -1
   // LLVM:  store i16 [[RET2]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
+  // OGCG:  [[RES2:%.*]] = atomicrmw nand ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
+  // OGCG:  [[INTERM2:%.*]] = and i16 [[RES2]], [[CONV2]]
+  // OGCG:  [[RET2:%.*]] = xor i16 [[INTERM2]], -1
+  // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_nand_and_fetch(&ss, uc);
 
   // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
@@ -1574,6 +1779,12 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM3:%.*]] = and i16 [[RES3]], [[CONV3]]
   // LLVM:  [[RET3:%.*]] = xor i16 [[INTERM3]], -1
   // LLVM:  store i16 [[RET3]], ptr %{{.*}}, align 2
+  // OGCG:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
+  // OGCG:  [[RES3:%.*]] = atomicrmw nand ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
+  // OGCG:  [[INTERM3:%.*]] = and i16 [[RES3]], [[CONV3]]
+  // OGCG:  [[RET3:%.*]] = xor i16 [[INTERM3]], -1
+  // OGCG:  store i16 [[RET3]], ptr %{{.*}}, align 2
   us = __sync_nand_and_fetch(&us, uc);
 
   // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
@@ -1586,6 +1797,12 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM4:%.*]] = and i32 [[RES4]], [[CONV4]]
   // LLVM:  [[RET4:%.*]] = xor i32 [[INTERM4]], -1
   // LLVM:  store i32 [[RET4]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
+  // OGCG:  [[RES4:%.*]] = atomicrmw nand ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
+  // OGCG:  [[INTERM4:%.*]] = and i32 [[RES4]], [[CONV4]]
+  // OGCG:  [[RET4:%.*]] = xor i32 [[INTERM4]], -1
+  // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_nand_and_fetch(&si, uc);
 
   // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
@@ -1598,6 +1815,12 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM5:%.*]] = and i32 [[RES5]], [[CONV5]]
   // LLVM:  [[RET5:%.*]] = xor i32 [[INTERM5]], -1
   // LLVM:  store i32 [[RET5]], ptr %{{.*}}, align 4
+  // OGCG:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
+  // OGCG:  [[RES5:%.*]] = atomicrmw nand ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
+  // OGCG:  [[INTERM5:%.*]] = and i32 [[RES5]], [[CONV5]]
+  // OGCG:  [[RET5:%.*]] = xor i32 [[INTERM5]], -1
+  // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_nand_and_fetch(&ui, uc);
 
   // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
@@ -1610,6 +1833,12 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM6:%.*]] = and i64 [[RES6]], [[CONV6]]
   // LLVM:  [[RET6:%.*]] = xor i64 [[INTERM6]], -1
   // LLVM:  store i64 [[RET6]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
+  // OGCG:  [[RES6:%.*]] = atomicrmw nand ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
+  // OGCG:  [[INTERM6:%.*]] = and i64 [[RES6]], [[CONV6]]
+  // OGCG:  [[RET6:%.*]] = xor i64 [[INTERM6]], -1
+  // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_nand_and_fetch(&sll, uc);
 
   // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
@@ -1622,5 +1851,11 @@ void test_op_and_fetch() {
   // LLVM:  [[INTERM7:%.*]] = and i64 [[RES7]], [[CONV7]]
   // LLVM:  [[RET7:%.*]] = xor i64 [[INTERM7]], -1
   // LLVM:  store i64 [[RET7]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
+  // OGCG:  [[RES7:%.*]] = atomicrmw nand ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
+  // OGCG:  [[INTERM7:%.*]] = and i64 [[RES7]], [[CONV7]]
+  // OGCG:  [[RET7:%.*]] = xor i64 [[INTERM7]], -1
+  // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_nand_and_fetch(&ull, uc);
 }

>From 0928406fbebc83c8fe395aa7ec74b44088cc1688 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Wed, 26 Nov 2025 22:04:29 +0100
Subject: [PATCH 03/10] fmt

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp  |  5 ++---
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp     |  3 ++-
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp |  2 +-
 clang/lib/CIR/CodeGen/CIRGenFunction.h   |  3 +--
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp    | 11 ++++++-----
 clang/lib/CIR/CodeGen/CIRGenTypes.h      |  3 +--
 6 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 490b34ac29560..01a74cb0075a2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -90,7 +90,7 @@ static Address checkAtomicAlignment(CIRGenFunction &cgf, const CallExpr *e) {
       mlir::isa<cir::PointerType>(ptr.getElementType())
           ? astContext.getTypeSizeInChars(astContext.VoidPtrTy).getQuantity()
           : cgf.cgm.getDataLayout().getTypeSizeInBits(ptr.getElementType()) /
-                                 cgf.cgm.getASTContext().getCharWidth();
+                cgf.cgm.getASTContext().getCharWidth();
 
   unsigned align = ptr.getAlignment().getQuantity();
   if (align % bytes != 0) {
@@ -673,8 +673,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__sync_fetch_and_max:
   case Builtin::BI__sync_fetch_and_umin:
   case Builtin::BI__sync_fetch_and_umax:
-    cgm.errorNYI(e->getSourceRange(),
-                           "__sync_fetch_and_* builtins NYI");
+    cgm.errorNYI(e->getSourceRange(), "__sync_fetch_and_* builtins NYI");
     return getUndefRValue(e->getType());
   case Builtin::BI__sync_add_and_fetch_1:
   case Builtin::BI__sync_add_and_fetch_2:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 0390d77776c78..046cc79531adf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -570,7 +570,8 @@ mlir::Value CIRGenFunction::emitFromMemory(mlir::Value value, QualType ty) {
     cgm.errorNYI("emitFromMemory: PackedVectorBoolType");
   }
 
-  if (ty->hasBooleanRepresentation() || ty->isBitIntType() || ty->isExtVectorBoolType()) {
+  if (ty->hasBooleanRepresentation() || ty->isBitIntType() ||
+      ty->isExtVectorBoolType()) {
     mlir::Type resTy = convertType(ty);
     return builder.createIntCast(value, resTy);
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 28f9a01df7b2a..5ce38eb8d16b2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -33,7 +33,7 @@ CIRGenFunction::CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder,
 CIRGenFunction::~CIRGenFunction() {}
 
 mlir::Type CIRGenFunction::convertTypeForLoadStore(QualType astType,
-                                                     mlir::Type mlirType) {
+                                                   mlir::Type mlirType) {
   return cgm.getTypes().convertTypeForLoadStore(astType, mlirType);
 }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 8ac9c9b38ce24..a79c04457ea0e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -202,8 +202,7 @@ class CIRGenFunction : public CIRGenTypeCache {
     return convertType(getContext().getTypeDeclType(t));
   }
 
-  mlir::Type convertTypeForLoadStore(QualType astType,
-                                                     mlir::Type mlirType);
+  mlir::Type convertTypeForLoadStore(QualType astType, mlir::Type mlirType);
 
   ///  Return the cir::TypeEvaluationKind of QualType \c type.
   static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type);
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index 3f2b795f734fc..cd7eaa33a6cc1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -118,7 +118,7 @@ std::string CIRGenTypes::getRecordTypeName(const clang::RecordDecl *recordDecl,
 }
 
 mlir::Type CIRGenTypes::convertTypeForLoadStore(QualType qualType,
-                                                 mlir::Type mlirType) {
+                                                mlir::Type mlirType) {
   if (!mlirType) {
     convertType(qualType);
 
@@ -126,14 +126,15 @@ mlir::Type CIRGenTypes::convertTypeForLoadStore(QualType qualType,
         mlir::cast<mlir::IntegerType>(mlirType).getWidth() == 1)
       return mlir::IntegerType::get(&getMLIRContext(),
                                     astContext.getTypeSize(qualType));
-    
-     return mlirType;
+
+    return mlirType;
   }
 
   if (qualType->isBitIntType())
     return mlir::IntegerType::get(
-        &getMLIRContext(), astContext.getTypeSizeInChars(qualType).getQuantity() * astContext.getCharWidth());
-
+        &getMLIRContext(),
+        astContext.getTypeSizeInChars(qualType).getQuantity() *
+            astContext.getCharWidth());
 
   if (qualType->isExtVectorBoolType())
     return convertTypeForMem(qualType);
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
index 027b26f037924..97ddc261b2bd9 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -118,8 +118,7 @@ class CIRGenTypes {
   std::string getRecordTypeName(const clang::RecordDecl *,
                                 llvm::StringRef suffix);
 
-  mlir::Type convertTypeForLoadStore(QualType qualType,
-                                                 mlir::Type mlirType);
+  mlir::Type convertTypeForLoadStore(QualType qualType, mlir::Type mlirType);
   const CIRGenRecordLayout &getCIRGenRecordLayout(const clang::RecordDecl *rd);
 
   /// Convert type T into an mlir::Type. This differs from convertType in that

>From 46e4b741453b00d7c89bb69ad4156e42773896d0 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Tue, 16 Dec 2025 12:04:16 +0100
Subject: [PATCH 04/10] Fix broken test

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp |  3 +--
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp    |  3 +++
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp   | 14 ++++++--------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 0945319a3be3a..70e0185ad5f20 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1113,8 +1113,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     // Finally, store the result using the pointer.
     bool isVolatile =
         resultArg->getType()->getPointeeType().isVolatileQualified();
-    builder.createStore(loc, emitToMemory(arithOp.getResult(), resultQTy),
-                        resultPtr, isVolatile);
+    builder.createStore(loc, arithOp.getResult(), resultPtr, isVolatile);
 
     return RValue::get(arithOp.getOverflow());
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 11f7c47785b25..e2feae8429648 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -546,6 +546,9 @@ LValue CIRGenFunction::emitLValueForFieldInitialization(
   return makeAddrLValue(v, fieldType, fieldBaseInfo);
 }
 
+/// Converts a scalar value from its primary IR type (as returned
+/// by ConvertType) to its load/store type (as returned by
+/// convertTypeForLoadStore).
 mlir::Value CIRGenFunction::emitToMemory(mlir::Value value, QualType ty) {
   if (auto *atomicTy = ty->getAs<AtomicType>())
     ty = atomicTy->getValueType();
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index cd7eaa33a6cc1..ee8b5eb8ad722 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -120,14 +120,7 @@ std::string CIRGenTypes::getRecordTypeName(const clang::RecordDecl *recordDecl,
 mlir::Type CIRGenTypes::convertTypeForLoadStore(QualType qualType,
                                                 mlir::Type mlirType) {
   if (!mlirType) {
-    convertType(qualType);
-
-    if (mlir::isa<mlir::IntegerType>(mlirType) &&
-        mlir::cast<mlir::IntegerType>(mlirType).getWidth() == 1)
-      return mlir::IntegerType::get(&getMLIRContext(),
-                                    astContext.getTypeSize(qualType));
-
-    return mlirType;
+    mlirType = convertType(qualType);
   }
 
   if (qualType->isBitIntType())
@@ -136,6 +129,11 @@ mlir::Type CIRGenTypes::convertTypeForLoadStore(QualType qualType,
         astContext.getTypeSizeInChars(qualType).getQuantity() *
             astContext.getCharWidth());
 
+  if (mlir::isa<mlir::IntegerType>(mlirType) &&
+      mlir::cast<mlir::IntegerType>(mlirType).getWidth() == 1)
+    return mlir::IntegerType::get(&getMLIRContext(),
+                                  astContext.getTypeSize(qualType));
+
   if (qualType->isExtVectorBoolType())
     return convertTypeForMem(qualType);
 

>From 41612b3dad16c7a0ba9d073760a3145448704b8d Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 20 Dec 2025 01:54:27 +0100
Subject: [PATCH 05/10] feedback

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp  | 44 ++++++++++++++++++++----
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp     | 15 ++------
 clang/lib/CIR/CodeGen/CIRGenFunction.cpp |  5 ---
 clang/lib/CIR/CodeGen/CIRGenFunction.h   |  2 --
 clang/lib/CIR/CodeGen/CIRGenTypes.cpp    | 23 -------------
 clang/lib/CIR/CodeGen/CIRGenTypes.h      |  1 -
 clang/test/CIR/CodeGen/atomic.c          | 14 ++++++++
 7 files changed, 54 insertions(+), 50 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 70e0185ad5f20..f2226cdd02f50 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -108,7 +108,7 @@ static Address checkAtomicAlignment(CIRGenFunction &cgf, const CallExpr *e) {
 /// and the expression node.
 static mlir::Value makeBinaryAtomicValue(
     CIRGenFunction &cgf, cir::AtomicFetchKind kind, const CallExpr *expr,
-    mlir::Value *neededValP = nullptr,
+    mlir::Type *originalArgType, mlir::Value *emittedArgValue = nullptr,
     cir::MemOrder ordering = cir::MemOrder::SequentiallyConsistent) {
 
   QualType type = expr->getType();
@@ -134,8 +134,10 @@ static mlir::Value makeBinaryAtomicValue(
   // that calculate the result of the operation as return value of
   // <binop>_and_fetch builtins. The `AtomicFetch` operation only updates the
   // memory location and returns the old value.
-  if (neededValP)
-    *neededValP = val;
+  if (emittedArgValue) {
+    *emittedArgValue = val;
+    *originalArgType = valueType;
+  }
 
   auto rmwi = cir::AtomicFetchOp::create(
       builder, cgf.getLoc(expr->getSourceRange()), destAddr.emitRawPointer(),
@@ -148,17 +150,20 @@ static RValue emitBinaryAtomicPost(CIRGenFunction &cgf,
                                    cir::AtomicFetchKind atomicOpkind,
                                    const CallExpr *e, cir::BinOpKind binopKind,
                                    bool invert = false) {
-  mlir::Value val;
+  mlir::Value emittedArgValue;
+  mlir::Type originalArgType;
   clang::QualType typ = e->getType();
-  mlir::Value result = makeBinaryAtomicValue(cgf, atomicOpkind, e, &val);
+  mlir::Value result = makeBinaryAtomicValue(
+      cgf, atomicOpkind, e, &originalArgType, &emittedArgValue);
   clang::CIRGen::CIRGenBuilderTy &builder = cgf.getBuilder();
-  result = cir::BinOp::create(builder, result.getLoc(), binopKind, result, val);
+  result = cir::BinOp::create(builder, result.getLoc(), binopKind, result,
+                              emittedArgValue);
 
   if (invert)
     result = cir::UnaryOp::create(builder, result.getLoc(),
                                   cir::UnaryOpKind::Not, result);
 
-  result = emitFromInt(cgf, result, typ, val.getType());
+  result = emitFromInt(cgf, result, typ, originalArgType);
   return RValue::get(result);
 }
 
@@ -1012,6 +1017,31 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__sync_nand_and_fetch_16:
     return emitBinaryAtomicPost(*this, cir::AtomicFetchKind::Nand, e,
                                 cir::BinOpKind::And, true);
+  case Builtin::BI__sync_val_compare_and_swap_1:
+  case Builtin::BI__sync_val_compare_and_swap_2:
+  case Builtin::BI__sync_val_compare_and_swap_4:
+  case Builtin::BI__sync_val_compare_and_swap_8:
+  case Builtin::BI__sync_val_compare_and_swap_16:
+  case Builtin::BI__sync_bool_compare_and_swap_1:
+  case Builtin::BI__sync_bool_compare_and_swap_2:
+  case Builtin::BI__sync_bool_compare_and_swap_4:
+  case Builtin::BI__sync_bool_compare_and_swap_8:
+  case Builtin::BI__sync_bool_compare_and_swap_16:
+  case Builtin::BI__sync_swap_1:
+  case Builtin::BI__sync_swap_2:
+  case Builtin::BI__sync_swap_4:
+  case Builtin::BI__sync_swap_8:
+  case Builtin::BI__sync_swap_16:
+  case Builtin::BI__sync_lock_test_and_set_1:
+  case Builtin::BI__sync_lock_test_and_set_2:
+  case Builtin::BI__sync_lock_test_and_set_4:
+  case Builtin::BI__sync_lock_test_and_set_8:
+  case Builtin::BI__sync_lock_test_and_set_16:
+  case Builtin::BI__sync_lock_release_1:
+  case Builtin::BI__sync_lock_release_2:
+  case Builtin::BI__sync_lock_release_4:
+  case Builtin::BI__sync_lock_release_8:
+  case Builtin::BI__sync_lock_release_16:
   case Builtin::BI__sync_synchronize:
   case Builtin::BI__builtin_nontemporal_load:
   case Builtin::BI__builtin_nontemporal_store:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index e2feae8429648..a293974ce4bd5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -547,8 +547,7 @@ LValue CIRGenFunction::emitLValueForFieldInitialization(
 }
 
 /// Converts a scalar value from its primary IR type (as returned
-/// by ConvertType) to its load/store type (as returned by
-/// convertTypeForLoadStore).
+/// by ConvertType) to its load/store type.
 mlir::Value CIRGenFunction::emitToMemory(mlir::Value value, QualType ty) {
   if (auto *atomicTy = ty->getAs<AtomicType>())
     ty = atomicTy->getValueType();
@@ -557,10 +556,8 @@ mlir::Value CIRGenFunction::emitToMemory(mlir::Value value, QualType ty) {
     cgm.errorNYI("emitToMemory: extVectorBoolType");
   }
 
-  if (ty->hasBooleanRepresentation() || ty->isBitIntType()) {
-    mlir::Type storeType = convertTypeForLoadStore(ty, value.getType());
-    return builder.createIntCast(value, storeType);
-  }
+  // Unlike in classic codegen CIR, bools are kept as `cir.bool` and BitInts are
+  // kept as `cir.int<N>` until further lowering
 
   return value;
 }
@@ -573,12 +570,6 @@ mlir::Value CIRGenFunction::emitFromMemory(mlir::Value value, QualType ty) {
     cgm.errorNYI("emitFromMemory: PackedVectorBoolType");
   }
 
-  if (ty->hasBooleanRepresentation() || ty->isBitIntType() ||
-      ty->isExtVectorBoolType()) {
-    mlir::Type resTy = convertType(ty);
-    return builder.createIntCast(value, resTy);
-  }
-
   return value;
 }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 40f08eaca3ab7..22128ed3521f8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -32,11 +32,6 @@ CIRGenFunction::CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder,
 
 CIRGenFunction::~CIRGenFunction() {}
 
-mlir::Type CIRGenFunction::convertTypeForLoadStore(QualType astType,
-                                                   mlir::Type mlirType) {
-  return cgm.getTypes().convertTypeForLoadStore(astType, mlirType);
-}
-
 // This is copied from clang/lib/CodeGen/CodeGenFunction.cpp
 cir::TypeEvaluationKind CIRGenFunction::getEvaluationKind(QualType type) {
   type = type.getCanonicalType();
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index f5cea14166794..2075a7b419d28 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -203,8 +203,6 @@ class CIRGenFunction : public CIRGenTypeCache {
     return convertType(getContext().getTypeDeclType(t));
   }
 
-  mlir::Type convertTypeForLoadStore(QualType astType, mlir::Type mlirType);
-
   ///  Return the cir::TypeEvaluationKind of QualType \c type.
   static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
index ee8b5eb8ad722..05eb9153060e3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.cpp
@@ -117,29 +117,6 @@ std::string CIRGenTypes::getRecordTypeName(const clang::RecordDecl *recordDecl,
   return builder.getUniqueRecordName(std::string(typeName));
 }
 
-mlir::Type CIRGenTypes::convertTypeForLoadStore(QualType qualType,
-                                                mlir::Type mlirType) {
-  if (!mlirType) {
-    mlirType = convertType(qualType);
-  }
-
-  if (qualType->isBitIntType())
-    return mlir::IntegerType::get(
-        &getMLIRContext(),
-        astContext.getTypeSizeInChars(qualType).getQuantity() *
-            astContext.getCharWidth());
-
-  if (mlir::isa<mlir::IntegerType>(mlirType) &&
-      mlir::cast<mlir::IntegerType>(mlirType).getWidth() == 1)
-    return mlir::IntegerType::get(&getMLIRContext(),
-                                  astContext.getTypeSize(qualType));
-
-  if (qualType->isExtVectorBoolType())
-    return convertTypeForMem(qualType);
-
-  return mlirType;
-}
-
 /// Return true if the specified type is already completely laid out.
 bool CIRGenTypes::isRecordLayoutComplete(const Type *ty) const {
   const auto it = recordDeclTypes.find(ty);
diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h
index 97ddc261b2bd9..e79cdfc9f8224 100644
--- a/clang/lib/CIR/CodeGen/CIRGenTypes.h
+++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h
@@ -118,7 +118,6 @@ class CIRGenTypes {
   std::string getRecordTypeName(const clang::RecordDecl *,
                                 llvm::StringRef suffix);
 
-  mlir::Type convertTypeForLoadStore(QualType qualType, mlir::Type mlirType);
   const CIRGenRecordLayout &getCIRGenRecordLayout(const clang::RecordDecl *rd);
 
   /// Convert type T into an mlir::Type. This differs from convertType in that
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index 03380898f09c0..b057bc55d0bac 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -1137,6 +1137,7 @@ int c11_atomic_fetch_nand(_Atomic(int) *ptr, int value) {
 // CHECK-LABEL: @test_op_and_fetch
 // LLVM-LABEL: @test_op_and_fetch
 void test_op_and_fetch() {
+  int *ptr;
   signed char sc;
   unsigned char uc;
   signed short ss;
@@ -1146,6 +1147,19 @@ void test_op_and_fetch() {
   signed long long sll;
   unsigned long long ull;
 
+  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
+  // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
+  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // LLVM:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
+  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
+  // OGCG:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
+  // OGCG:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
+  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  ptr = __sync_add_and_fetch(&ptr, ptr);
+
   // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
   // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
   // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i

>From 46305e0c5262de46c1c1fb4a62f0d81196e42195 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hendrik=20H=C3=BCbner?=
 <117831077+HendrikHuebner at users.noreply.github.com>
Date: Tue, 6 Jan 2026 16:00:46 +0100
Subject: [PATCH 06/10] rename parameter

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/lib/CIR/CodeGen/Address.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h
index 02a24a86b3c84..f32cde957cd5c 100644
--- a/clang/lib/CIR/CodeGen/Address.h
+++ b/clang/lib/CIR/CodeGen/Address.h
@@ -48,8 +48,8 @@ class Address {
       : Address(pointer, elementType, alignment, false) {}
 
   Address(mlir::Value pointer, mlir::Type elementType,
-          clang::CharUnits alignment, bool pointerAndKnownNonNull)
-      : pointerAndKnownNonNull(pointer, pointerAndKnownNonNull),
+          clang::CharUnits alignment, bool isKnownNonNull)
+      : pointerAndKnownNonNull(pointer, isKnownNonNull),
         elementType(elementType), alignment(alignment) {
     assert(pointer && "Pointer cannot be null");
     assert(elementType && "Element type cannot be null");

>From bd1c677d33e6c25b4a8fd341a7d527a22553b5e0 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sun, 11 Jan 2026 16:05:09 +0100
Subject: [PATCH 07/10] fix

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp |  27 +-
 clang/test/CIR/CodeGen/atomic.c         | 323 ++++++++++++------------
 2 files changed, 185 insertions(+), 165 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 81fc4c854e21e..66b95486fd647 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -122,13 +122,26 @@ static mlir::Value makeBinaryAtomicValue(
 
   Address destAddr = checkAtomicAlignment(cgf, expr);
   CIRGenBuilderTy &builder = cgf.getBuilder();
-  cir::IntType intType =
-      ptrType->getPointeeType()->isUnsignedIntegerType()
-          ? builder.getUIntNTy(cgf.getContext().getTypeSize(type))
-          : builder.getSIntNTy(cgf.getContext().getTypeSize(type));
+
   mlir::Value val = cgf.emitScalarExpr(expr->getArg(1));
   mlir::Type valueType = val.getType();
-  val = emitToInt(cgf, val, type, intType);
+  mlir::Value destValue = destAddr.emitRawPointer();
+
+  if (ptrType->getPointeeType()->isPointerType()) {
+    // Pointer to pointer
+    // `cir.atomic.fetch` expects a pointer to an integer type, so we cast
+    // ptr<ptr<T>> to ptr<intPtrSize>
+    cir::IntType ptrSizeInt = builder.getSIntNTy(cgf.getContext().getTypeSize(ptrType));
+    destValue = builder.createBitcast(destValue, builder.getPointerTo(ptrSizeInt));
+    val = emitToInt(cgf, val, type, ptrSizeInt);
+  } else {
+    // Pointer to integer type
+    cir::IntType intType =
+        ptrType->getPointeeType()->isUnsignedIntegerType()
+            ? builder.getUIntNTy(cgf.getContext().getTypeSize(type))
+            : builder.getSIntNTy(cgf.getContext().getTypeSize(type));
+    val = emitToInt(cgf, val, type, intType);
+  }
 
   // This output argument is needed for post atomic fetch operations
   // that calculate the result of the operation as return value of
@@ -140,10 +153,10 @@ static mlir::Value makeBinaryAtomicValue(
   }
 
   auto rmwi = cir::AtomicFetchOp::create(
-      builder, cgf.getLoc(expr->getSourceRange()), destAddr.emitRawPointer(),
+      builder, cgf.getLoc(expr->getSourceRange()), destValue,
       val, kind, ordering, false, /* is volatile */
       true);                      /* fetch first */
-  return emitFromInt(cgf, rmwi->getResult(0), type, valueType);
+  return rmwi->getResult(0);
 }
 
 static RValue emitBinaryAtomicPost(CIRGenFunction &cgf,
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index e26ebc77430f0..b9966faaca837 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -16,7 +16,7 @@ void f1(void) {
 // CIR:       }
 
 // LLVM-LABEL: @f1
-// LLVM:         %[[SLOT:.+]] = alloca i32, i64 1, align 4
+// LLVM:         %[[SLOT:.+]] = allocai32, i64 1, align 4
 // LLVM-NEXT:    store i32 42, ptr %[[SLOT]], align 4
 // LLVM:       }
 
@@ -1147,22 +1147,29 @@ void test_op_and_fetch() {
   signed long long sll;
   unsigned long long ull;
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
-  // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
-  // LLVM:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
-  // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
-  // LLVM:  store i8 [[RET0]], ptr %{{.*}}, align 1
-  // OGCG:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
-  // OGCG:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
-  // OGCG:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
-  // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
+  // CIR: [[RES0:%.*]] = cir.load align(8) {{%.*}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
+  // CIR: [[VAL0:%.*]] = cir.cast bitcast {{%.*}} : !cir.ptr<!cir.ptr<!s32i>> -> !cir.ptr<!s64i>
+  // CIR: [[VAL1:%.*]] = cir.cast ptr_to_int {{%.*}} : !cir.ptr<!s32i> -> !s64i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch add seq_cst fetch_first [[VAL0]], [[VAL1]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[RES2:%.*]] = cir.binop(add, [[RES1]], [[VAL1]]) : !s64i
+  // CIR: [[RES3:%.*]] = cir.cast int_to_ptr [[RES2]] : !s64i -> !cir.ptr<!s32i>
+  // LLVM:  [[VAL0:%.*]] = load ptr, ptr %{{.*}}, align 8
+  // LLVM:  [[VAL1:%.*]] = ptrtoint ptr %{{.*}} to i64
+  // LLVM:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[VAL1]] seq_cst, align 8
+  // LLVM:  [[RET0:%.*]] = add i64 [[RES0]], [[VAL1]]
+  // LLVM:  [[RET1:%.*]] = inttoptr i64 [[RET0]] to ptr
+  // LLVM:  store ptr [[RET1]], ptr %{{.*}}, align 8
+  // OGCG:  [[VAL0:%.*]] = load ptr, ptr %{{.*}}, align 8
+  // OGCG:  [[VAL1:%.*]] = ptrtoint ptr %{{.*}} to i64
+  // OGCG:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[VAL1]] seq_cst, align 8
+  // OGCG:  [[RET0:%.*]] = add i64 [[RES0]], [[VAL1]]
+  // OGCG:  [[RET1:%.*]] = inttoptr i64 [[RET0]] to ptr
+  // OGCG:  store ptr [[RET1]], ptr %{{.*}}, align 8
   ptr = __sync_add_and_fetch(&ptr, ptr);
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
+  // CIR: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CIR: [[RES0:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL0]] : (!cir.ptr<!s8i>, !s8i) -> !s8i
+  // CIR: [[RET0:%.*]] = cir.binop(add, [[RES0]], [[VAL0]]) : !s8i
   // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES0:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = add i8 [[RES0]], [[VAL0]]
@@ -1173,8 +1180,8 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_add_and_fetch(&sc, uc);
 
-  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
-  // CHECK: [[RET1:%.*]] = cir.binop(add, [[RES1]], [[VAL1]]) : !u8i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL1:%.*]] : (!cir.ptr<!u8i>, !u8i) -> !u8i
+  // CIR: [[RET1:%.*]] = cir.binop(add, [[RES1]], [[VAL1]]) : !u8i
   // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES1:%.*]] = atomicrmw add ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = add i8 [[RES1]], [[VAL1]]
@@ -1185,9 +1192,9 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_add_and_fetch(&uc, uc);
 
-  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
-  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
-  // CHECK: [[RET2:%.*]] = cir.binop(add, [[RES2]], [[VAL2]]) : !s16i
+  // CIR: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CIR: [[RES2:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL2]] : (!cir.ptr<!s16i>, !s16i) -> !s16i
+  // CIR: [[RET2:%.*]] = cir.binop(add, [[RES2]], [[VAL2]]) : !s16i
   // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
   // LLVM:  [[RES2:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
@@ -1200,9 +1207,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_add_and_fetch(&ss, uc);
 
-  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
-  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
-  // CHECK: [[RET3:%.*]] = cir.binop(add, [[RES3]], [[VAL3]]) : !u16i
+  // CIR: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: [[RES3:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL3]] : (!cir.ptr<!u16i>, !u16i) -> !u16i
+  // CIR: [[RET3:%.*]] = cir.binop(add, [[RES3]], [[VAL3]]) : !u16i
   // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
   // LLVM:  [[RES3:%.*]] = atomicrmw add ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
@@ -1215,9 +1222,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_add_and_fetch(&us, uc);
 
-  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
-  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
-  // CHECK: [[RET4:%.*]] = cir.binop(add, [[RES4]], [[VAL4]]) : !s32i
+  // CIR: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CIR: [[RES4:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL4]] : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: [[RET4:%.*]] = cir.binop(add, [[RES4]], [[VAL4]]) : !s32i
   // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
@@ -1230,9 +1237,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_add_and_fetch(&si, uc);
 
-  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
-  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
-  // CHECK: [[RET5:%.*]] = cir.binop(add, [[RES5]], [[VAL5]]) : !u32i
+  // CIR: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: [[RES5:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL5]] : (!cir.ptr<!u32i>, !u32i) -> !u32i
+  // CIR: [[RET5:%.*]] = cir.binop(add, [[RES5]], [[VAL5]]) : !u32i
   // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
   // LLVM:  [[RES5:%.*]] = atomicrmw add ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
@@ -1245,9 +1252,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_add_and_fetch(&ui, uc);
 
-  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
-  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
-  // CHECK: [[RET6:%.*]] = cir.binop(add, [[RES6]], [[VAL6]]) : !s64i
+  // CIR: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CIR: [[RES6:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL6]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[RET6:%.*]] = cir.binop(add, [[RES6]], [[VAL6]]) : !s64i
   // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
   // LLVM:  [[RES6:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
@@ -1260,9 +1267,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_add_and_fetch(&sll, uc);
 
-  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
-  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(add, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
-  // CHECK: [[RET7:%.*]] = cir.binop(add, [[RES7]], [[VAL7]]) : !u64i
+  // CIR: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: [[RES7:%.*]] = cir.atomic.fetch add seq_cst fetch_first {{%.*}}, [[VAL7]] : (!cir.ptr<!u64i>, !u64i) -> !u64i
+  // CIR: [[RET7:%.*]] = cir.binop(add, [[RES7]], [[VAL7]]) : !u64i
   // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
   // LLVM:  [[RES7:%.*]] = atomicrmw add ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
@@ -1275,9 +1282,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_add_and_fetch(&ull, uc);
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[RET0:%.*]] = cir.binop(sub, [[RES0]], [[VAL0]]) : !s8i
+  // CIR: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CIR: [[RES0:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL0]] : (!cir.ptr<!s8i>, !s8i) -> !s8i
+  // CIR: [[RET0:%.*]] = cir.binop(sub, [[RES0]], [[VAL0]]) : !s8i
   // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES0:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = sub i8 [[RES0]], [[VAL0]]
@@ -1288,8 +1295,8 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_sub_and_fetch(&sc, uc);
 
-  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
-  // CHECK: [[RET1:%.*]] = cir.binop(sub, [[RES1]], [[VAL1]]) : !u8i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL1:%.*]] : (!cir.ptr<!u8i>, !u8i) -> !u8i
+  // CIR: [[RET1:%.*]] = cir.binop(sub, [[RES1]], [[VAL1]]) : !u8i
   // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES1:%.*]] = atomicrmw sub ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = sub i8 [[RES1]], [[VAL1]]
@@ -1300,9 +1307,9 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_sub_and_fetch(&uc, uc);
 
-  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
-  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
-  // CHECK: [[RET2:%.*]] = cir.binop(sub, [[RES2]], [[VAL2]]) : !s16i
+  // CIR: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CIR: [[RES2:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL2]] : (!cir.ptr<!s16i>, !s16i) -> !s16i
+  // CIR: [[RET2:%.*]] = cir.binop(sub, [[RES2]], [[VAL2]]) : !s16i
   // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
   // LLVM:  [[RES2:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
@@ -1315,9 +1322,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_sub_and_fetch(&ss, uc);
 
-  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
-  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
-  // CHECK: [[RET3:%.*]] = cir.binop(sub, [[RES3]], [[VAL3]]) : !u16i
+  // CIR: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: [[RES3:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL3]] : (!cir.ptr<!u16i>, !u16i) -> !u16i
+  // CIR: [[RET3:%.*]] = cir.binop(sub, [[RES3]], [[VAL3]]) : !u16i
   // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
   // LLVM:  [[RES3:%.*]] = atomicrmw sub ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
@@ -1330,9 +1337,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_sub_and_fetch(&us, uc);
 
-  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
-  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
-  // CHECK: [[RET4:%.*]] = cir.binop(sub, [[RES4]], [[VAL4]]) : !s32i
+  // CIR: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CIR: [[RES4:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL4]] : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: [[RET4:%.*]] = cir.binop(sub, [[RES4]], [[VAL4]]) : !s32i
   // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
@@ -1344,9 +1351,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_sub_and_fetch(&si, uc);
 
-  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
-  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
-  // CHECK: [[RET5:%.*]] = cir.binop(sub, [[RES5]], [[VAL5]]) : !u32i
+  // CIR: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: [[RES5:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL5]] : (!cir.ptr<!u32i>, !u32i) -> !u32i
+  // CIR: [[RET5:%.*]] = cir.binop(sub, [[RES5]], [[VAL5]]) : !u32i
   // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
   // LLVM:  [[RES5:%.*]] = atomicrmw sub ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
@@ -1359,9 +1366,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_sub_and_fetch(&ui, uc);
 
-  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
-  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
-  // CHECK: [[RET6:%.*]] = cir.binop(sub, [[RES6]], [[VAL6]]) : !s64i
+  // CIR: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CIR: [[RES6:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL6]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[RET6:%.*]] = cir.binop(sub, [[RES6]], [[VAL6]]) : !s64i
   // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
   // LLVM:  [[RES6:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
@@ -1374,9 +1381,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_sub_and_fetch(&sll, uc);
 
-  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
-  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(sub, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
-  // CHECK: [[RET7:%.*]] = cir.binop(sub, [[RES7]], [[VAL7]]) : !u64i
+  // CIR: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: [[RES7:%.*]] = cir.atomic.fetch sub seq_cst fetch_first {{%.*}}, [[VAL7]] : (!cir.ptr<!u64i>, !u64i) -> !u64i
+  // CIR: [[RET7:%.*]] = cir.binop(sub, [[RES7]], [[VAL7]]) : !u64i
   // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
   // LLVM:  [[RES7:%.*]] = atomicrmw sub ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
@@ -1389,9 +1396,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_sub_and_fetch(&ull, uc);
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[RET0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
+  // CIR: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CIR: [[RES0:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL0]] : (!cir.ptr<!s8i>, !s8i) -> !s8i
+  // CIR: [[RET0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
   // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES0:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = and i8 [[RES0]], [[VAL0]]
@@ -1402,8 +1409,8 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_and_and_fetch(&sc, uc);
 
-  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
-  // CHECK: [[RET1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL1:%.*]] : (!cir.ptr<!u8i>, !u8i) -> !u8i
+  // CIR: [[RET1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
   // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES1:%.*]] = atomicrmw and ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = and i8 [[RES1]], [[VAL1]]
@@ -1414,9 +1421,9 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_and_and_fetch(&uc, uc);
 
-  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
-  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
-  // CHECK: [[RET2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
+  // CIR: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CIR: [[RES2:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL2]] : (!cir.ptr<!s16i>, !s16i) -> !s16i
+  // CIR: [[RET2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
   // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
   // LLVM:  [[RES2:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
@@ -1429,9 +1436,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_and_and_fetch(&ss, uc);
 
-  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
-  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
-  // CHECK: [[RET3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
+  // CIR: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: [[RES3:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL3]] : (!cir.ptr<!u16i>, !u16i) -> !u16i
+  // CIR: [[RET3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
   // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
   // LLVM:  [[RES3:%.*]] = atomicrmw and ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
@@ -1444,9 +1451,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_and_and_fetch(&us, uc);
 
-  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
-  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
-  // CHECK: [[RET4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
+  // CIR: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CIR: [[RES4:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL4]] : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: [[RET4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
   // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
@@ -1459,9 +1466,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_and_and_fetch(&si, uc);
 
-  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
-  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
-  // CHECK: [[RET5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
+  // CIR: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: [[RES5:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL5]] : (!cir.ptr<!u32i>, !u32i) -> !u32i
+  // CIR: [[RET5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
   // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
   // LLVM:  [[RES5:%.*]] = atomicrmw and ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
@@ -1474,9 +1481,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_and_and_fetch(&ui, uc);
 
-  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
-  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
-  // CHECK: [[RET6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
+  // CIR: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CIR: [[RES6:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL6]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[RET6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
   // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
   // LLVM:  [[RES6:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
@@ -1489,9 +1496,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_and_and_fetch(&sll, uc);
 
-  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
-  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(and, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
-  // CHECK: [[RET7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
+  // CIR: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: [[RES7:%.*]] = cir.atomic.fetch and seq_cst fetch_first {{%.*}}, [[VAL7]] : (!cir.ptr<!u64i>, !u64i) -> !u64i
+  // CIR: [[RET7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
   // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
   // LLVM:  [[RES7:%.*]] = atomicrmw and ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
@@ -1504,9 +1511,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_and_and_fetch(&ull, uc);
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[RET0:%.*]] = cir.binop(or, [[RES0]], [[VAL0]]) : !s8i
+  // CIR: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CIR: [[RES0:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL0]] : (!cir.ptr<!s8i>, !s8i) -> !s8i
+  // CIR: [[RET0:%.*]] = cir.binop(or, [[RES0]], [[VAL0]]) : !s8i
   // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES0:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = or i8 [[RES0]], [[VAL0]]
@@ -1517,8 +1524,8 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_or_and_fetch(&sc, uc);
 
-  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
-  // CHECK: [[RET1:%.*]] = cir.binop(or, [[RES1]], [[VAL1]]) : !u8i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL1:%.*]] : (!cir.ptr<!u8i>, !u8i) -> !u8i
+  // CIR: [[RET1:%.*]] = cir.binop(or, [[RES1]], [[VAL1]]) : !u8i
   // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES1:%.*]] = atomicrmw or ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = or i8 [[RES1]], [[VAL1]]
@@ -1529,9 +1536,9 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_or_and_fetch(&uc, uc);
 
-  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
-  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
-  // CHECK: [[RET2:%.*]] = cir.binop(or, [[RES2]], [[VAL2]]) : !s16i
+  // CIR: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CIR: [[RES2:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL2]] : (!cir.ptr<!s16i>, !s16i) -> !s16i
+  // CIR: [[RET2:%.*]] = cir.binop(or, [[RES2]], [[VAL2]]) : !s16i
   // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
   // LLVM:  [[RES2:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
@@ -1544,9 +1551,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_or_and_fetch(&ss, uc);
 
-  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
-  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
-  // CHECK: [[RET3:%.*]] = cir.binop(or, [[RES3]], [[VAL3]]) : !u16i
+  // CIR: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: [[RES3:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL3]] : (!cir.ptr<!u16i>, !u16i) -> !u16i
+  // CIR: [[RET3:%.*]] = cir.binop(or, [[RES3]], [[VAL3]]) : !u16i
   // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
   // LLVM:  [[RES3:%.*]] = atomicrmw or ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
@@ -1559,9 +1566,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_or_and_fetch(&us, uc);
 
-  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
-  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
-  // CHECK: [[RET4:%.*]] = cir.binop(or, [[RES4]], [[VAL4]]) : !s32i
+  // CIR: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CIR: [[RES4:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL4]] : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: [[RET4:%.*]] = cir.binop(or, [[RES4]], [[VAL4]]) : !s32i
   // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
@@ -1574,9 +1581,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_or_and_fetch(&si, uc);
 
-  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
-  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
-  // CHECK: [[RET5:%.*]] = cir.binop(or, [[RES5]], [[VAL5]]) : !u32i
+  // CIR: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: [[RES5:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL5]] : (!cir.ptr<!u32i>, !u32i) -> !u32i
+  // CIR: [[RET5:%.*]] = cir.binop(or, [[RES5]], [[VAL5]]) : !u32i
   // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
   // LLVM:  [[RES5:%.*]] = atomicrmw or ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
@@ -1589,9 +1596,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_or_and_fetch(&ui, uc);
 
-  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
-  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
-  // CHECK: [[RET6:%.*]] = cir.binop(or, [[RES6]], [[VAL6]]) : !s64i
+  // CIR: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CIR: [[RES6:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL6]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[RET6:%.*]] = cir.binop(or, [[RES6]], [[VAL6]]) : !s64i
   // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
   // LLVM:  [[RES6:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
@@ -1604,9 +1611,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_or_and_fetch(&sll, uc);
 
-  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
-  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(or, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
-  // CHECK: [[RET7:%.*]] = cir.binop(or, [[RES7]], [[VAL7]]) : !u64i
+  // CIR: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: [[RES7:%.*]] = cir.atomic.fetch or seq_cst fetch_first {{%.*}}, [[VAL7]] : (!cir.ptr<!u64i>, !u64i) -> !u64i
+  // CIR: [[RET7:%.*]] = cir.binop(or, [[RES7]], [[VAL7]]) : !u64i
   // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
   // LLVM:  [[RES7:%.*]] = atomicrmw or ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
@@ -1619,9 +1626,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_or_and_fetch(&ull, uc);
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[RET0:%.*]] = cir.binop(xor, [[RES0]], [[VAL0]]) : !s8i
+  // CIR: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CIR: [[RES0:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL0]] : (!cir.ptr<!s8i>, !s8i) -> !s8i
+  // CIR: [[RET0:%.*]] = cir.binop(xor, [[RES0]], [[VAL0]]) : !s8i
   // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES0:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[RET0:%.*]] = xor i8 [[RES0]], [[VAL0]]
@@ -1632,8 +1639,8 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_xor_and_fetch(&sc, uc);
 
-  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
-  // CHECK: [[RET1:%.*]] = cir.binop(xor, [[RES1]], [[VAL1]]) : !u8i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL1:%.*]] : (!cir.ptr<!u8i>, !u8i) -> !u8i
+  // CIR: [[RET1:%.*]] = cir.binop(xor, [[RES1]], [[VAL1]]) : !u8i
   // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES1:%.*]] = atomicrmw xor ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[RET1:%.*]] = xor i8 [[RES1]], [[VAL1]]
@@ -1644,9 +1651,9 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_xor_and_fetch(&uc, uc);
 
-  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
-  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
-  // CHECK: [[RET2:%.*]] = cir.binop(xor, [[RES2]], [[VAL2]]) : !s16i
+  // CIR: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CIR: [[RES2:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL2]] : (!cir.ptr<!s16i>, !s16i) -> !s16i
+  // CIR: [[RET2:%.*]] = cir.binop(xor, [[RES2]], [[VAL2]]) : !s16i
   // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
   // LLVM:  [[RES2:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
@@ -1659,9 +1666,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_xor_and_fetch(&ss, uc);
 
-  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
-  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
-  // CHECK: [[RET3:%.*]] = cir.binop(xor, [[RES3]], [[VAL3]]) : !u16i
+  // CIR: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: [[RES3:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL3]] : (!cir.ptr<!u16i>, !u16i) -> !u16i
+  // CIR: [[RET3:%.*]] = cir.binop(xor, [[RES3]], [[VAL3]]) : !u16i
   // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
   // LLVM:  [[RES3:%.*]] = atomicrmw xor ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
@@ -1674,9 +1681,9 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET3]], ptr %{{.*}}
   us = __sync_xor_and_fetch(&us, uc);
 
-  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
-  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
-  // CHECK: [[RET4:%.*]] = cir.binop(xor, [[RES4]], [[VAL4]]) : !s32i
+  // CIR: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CIR: [[RES4:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL4]] : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: [[RET4:%.*]] = cir.binop(xor, [[RES4]], [[VAL4]]) : !s32i
   // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
@@ -1689,9 +1696,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_xor_and_fetch(&si, uc);
 
-  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
-  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
-  // CHECK: [[RET5:%.*]] = cir.binop(xor, [[RES5]], [[VAL5]]) : !u32i
+  // CIR: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: [[RES5:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL5]] : (!cir.ptr<!u32i>, !u32i) -> !u32i
+  // CIR: [[RET5:%.*]] = cir.binop(xor, [[RES5]], [[VAL5]]) : !u32i
   // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
   // LLVM:  [[RES5:%.*]] = atomicrmw xor ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
@@ -1704,9 +1711,9 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_xor_and_fetch(&ui, uc);
 
-  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
-  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
-  // CHECK: [[RET6:%.*]] = cir.binop(xor, [[RES6]], [[VAL6]]) : !s64i
+  // CIR: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CIR: [[RES6:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL6]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[RET6:%.*]] = cir.binop(xor, [[RES6]], [[VAL6]]) : !s64i
   // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
   // LLVM:  [[RES6:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
@@ -1719,9 +1726,9 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_xor_and_fetch(&sll, uc);
 
-  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
-  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(xor, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
-  // CHECK: [[RET7:%.*]] = cir.binop(xor, [[RES7]], [[VAL7]]) : !u64i
+  // CIR: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: [[RES7:%.*]] = cir.atomic.fetch xor seq_cst fetch_first {{%.*}}, [[VAL7]] : (!cir.ptr<!u64i>, !u64i) -> !u64i
+  // CIR: [[RET7:%.*]] = cir.binop(xor, [[RES7]], [[VAL7]]) : !u64i
   // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
   // LLVM:  [[RES7:%.*]] = atomicrmw xor ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8
@@ -1734,10 +1741,10 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET7]], ptr %{{.*}}, align 8
   ull = __sync_xor_and_fetch(&ull, uc);
 
-  // CHECK: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
-  // CHECK: [[RES0:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s8i>, [[VAL0]] : !s8i, seq_cst) fetch_first : !s8i
-  // CHECK: [[INTERM0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
-  // CHECK: [[RET0:%.*]] =  cir.unary(not, [[INTERM0]]) : !s8i, !s8i
+  // CIR: [[VAL0:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s8i
+  // CIR: [[RES0:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL0]] : (!cir.ptr<!s8i>, !s8i) -> !s8i
+  // CIR: [[INTERM0:%.*]] = cir.binop(and, [[RES0]], [[VAL0]]) : !s8i
+  // CIR: [[RET0:%.*]] =  cir.unary(not, [[INTERM0]]) : !s8i, !s8i
   // LLVM:  [[VAL0:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES0:%.*]] = atomicrmw nand ptr %{{.*}}, i8 [[VAL0]] seq_cst, align 1
   // LLVM:  [[INTERM0:%.*]] = and i8 [[RES0]], [[VAL0]]
@@ -1750,9 +1757,9 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET0]], ptr %{{.*}}, align 1
   sc = __sync_nand_and_fetch(&sc, uc);
 
-  // CHECK: [[RES1:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u8i>, [[VAL1:%.*]] : !u8i, seq_cst) fetch_first : !u8i
-  // CHECK: [[INTERM1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
-  // CHECK: [[RET1:%.*]] = cir.unary(not, [[INTERM1]]) : !u8i, !u8i
+  // CIR: [[RES1:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL1:%.*]] : (!cir.ptr<!u8i>, !u8i) -> !u8i
+  // CIR: [[INTERM1:%.*]] = cir.binop(and, [[RES1]], [[VAL1]]) : !u8i
+  // CIR: [[RET1:%.*]] = cir.unary(not, [[INTERM1]]) : !u8i, !u8i
   // LLVM:  [[VAL1:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[RES1:%.*]] = atomicrmw nand ptr %{{.*}}, i8 [[VAL1]] seq_cst, align 1
   // LLVM:  [[INTERM1:%.*]] = and i8 [[RES1]], [[VAL1]]
@@ -1765,10 +1772,10 @@ void test_op_and_fetch() {
   // OGCG:  store i8 [[RET1]], ptr %{{.*}}, align 1
   uc = __sync_nand_and_fetch(&uc, uc);
 
-  // CHECK: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
-  // CHECK: [[RES2:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s16i>, [[VAL2]] : !s16i, seq_cst) fetch_first : !s16i
-  // CHECK: [[INTERM2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
-  // CHECK: [[RET2:%.*]] =  cir.unary(not, [[INTERM2]]) : !s16i, !s16i
+  // CIR: [[VAL2:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s16i
+  // CIR: [[RES2:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL2]] : (!cir.ptr<!s16i>, !s16i) -> !s16i
+  // CIR: [[INTERM2:%.*]] = cir.binop(and, [[RES2]], [[VAL2]]) : !s16i
+  // CIR: [[RET2:%.*]] =  cir.unary(not, [[INTERM2]]) : !s16i, !s16i
   // LLVM:  [[VAL2:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV2:%.*]] = zext i8 [[VAL2]] to i16
   // LLVM:  [[RES2:%.*]] = atomicrmw nand ptr %{{.*}}, i16 [[CONV2]] seq_cst, align 2
@@ -1783,10 +1790,10 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET2]], ptr %{{.*}}, align 2
   ss = __sync_nand_and_fetch(&ss, uc);
 
-  // CHECK: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
-  // CHECK: [[RES3:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u16i>, [[VAL3]] : !u16i, seq_cst) fetch_first : !u16i
-  // CHECK: [[INTERM3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
-  // CHECK: [[RET3:%.*]] =  cir.unary(not, [[INTERM3]]) : !u16i, !u16i
+  // CIR: [[VAL3:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: [[RES3:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL3]] : (!cir.ptr<!u16i>, !u16i) -> !u16i
+  // CIR: [[INTERM3:%.*]] = cir.binop(and, [[RES3]], [[VAL3]]) : !u16i
+  // CIR: [[RET3:%.*]] =  cir.unary(not, [[INTERM3]]) : !u16i, !u16i
   // LLVM:  [[VAL3:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV3:%.*]] = zext i8 [[VAL3]] to i16
   // LLVM:  [[RES3:%.*]] = atomicrmw nand ptr %{{.*}}, i16 [[CONV3]] seq_cst, align 2
@@ -1801,10 +1808,10 @@ void test_op_and_fetch() {
   // OGCG:  store i16 [[RET3]], ptr %{{.*}}, align 2
   us = __sync_nand_and_fetch(&us, uc);
 
-  // CHECK: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
-  // CHECK: [[RES4:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s32i>, [[VAL4]] : !s32i, seq_cst) fetch_first : !s32i
-  // CHECK: [[INTERM4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
-  // CHECK: [[RET4:%.*]] =  cir.unary(not, [[INTERM4]]) : !s32i, !s32i
+  // CIR: [[VAL4:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s32i
+  // CIR: [[RES4:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL4]] : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: [[INTERM4:%.*]] = cir.binop(and, [[RES4]], [[VAL4]]) : !s32i
+  // CIR: [[RET4:%.*]] =  cir.unary(not, [[INTERM4]]) : !s32i, !s32i
   // LLVM:  [[VAL4:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV4:%.*]] = zext i8 [[VAL4]] to i32
   // LLVM:  [[RES4:%.*]] = atomicrmw nand ptr %{{.*}}, i32 [[CONV4]] seq_cst, align 4
@@ -1819,10 +1826,10 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET4]], ptr %{{.*}}, align 4
   si = __sync_nand_and_fetch(&si, uc);
 
-  // CHECK: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
-  // CHECK: [[RES5:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u32i>, [[VAL5]] : !u32i, seq_cst) fetch_first : !u32i
-  // CHECK: [[INTERM5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
-  // CHECK: [[RET5:%.*]] =  cir.unary(not, [[INTERM5]]) : !u32i, !u32i
+  // CIR: [[VAL5:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: [[RES5:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL5]] : (!cir.ptr<!u32i>, !u32i) -> !u32i
+  // CIR: [[INTERM5:%.*]] = cir.binop(and, [[RES5]], [[VAL5]]) : !u32i
+  // CIR: [[RET5:%.*]] =  cir.unary(not, [[INTERM5]]) : !u32i, !u32i
   // LLVM:  [[VAL5:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV5:%.*]] = zext i8 [[VAL5]] to i32
   // LLVM:  [[RES5:%.*]] = atomicrmw nand ptr %{{.*}}, i32 [[CONV5]] seq_cst, align 4
@@ -1837,10 +1844,10 @@ void test_op_and_fetch() {
   // OGCG:  store i32 [[RET5]], ptr %{{.*}}, align 4
   ui = __sync_nand_and_fetch(&ui, uc);
 
-  // CHECK: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
-  // CHECK: [[RES6:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!s64i>, [[VAL6]] : !s64i, seq_cst) fetch_first : !s64i
-  // CHECK: [[INTERM6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
-  // CHECK: [[RET6:%.*]] =  cir.unary(not, [[INTERM6]]) : !s64i, !s64i
+  // CIR: [[VAL6:%.*]] = cir.cast integral {{%.*}} : !u8i -> !s64i
+  // CIR: [[RES6:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL6]] : (!cir.ptr<!s64i>, !s64i) -> !s64i
+  // CIR: [[INTERM6:%.*]] = cir.binop(and, [[RES6]], [[VAL6]]) : !s64i
+  // CIR: [[RET6:%.*]] =  cir.unary(not, [[INTERM6]]) : !s64i, !s64i
   // LLVM:  [[VAL6:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV6:%.*]] = zext i8 [[VAL6]] to i64
   // LLVM:  [[RES6:%.*]] = atomicrmw nand ptr %{{.*}}, i64 [[CONV6]] seq_cst, align 8
@@ -1855,10 +1862,10 @@ void test_op_and_fetch() {
   // OGCG:  store i64 [[RET6]], ptr %{{.*}}, align 8
   sll = __sync_nand_and_fetch(&sll, uc);
 
-  // CHECK: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
-  // CHECK: [[RES7:%.*]] = cir.atomic.fetch(nand, {{%.*}} : !cir.ptr<!u64i>, [[VAL7]] : !u64i, seq_cst) fetch_first : !u64i
-  // CHECK: [[INTERM7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
-  // CHECK: [[RET7:%.*]] =  cir.unary(not, [[INTERM7]]) : !u64i, !u64i
+  // CIR: [[VAL7:%.*]] = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: [[RES7:%.*]] = cir.atomic.fetch nand seq_cst fetch_first {{%.*}}, [[VAL7]] : (!cir.ptr<!u64i>, !u64i) -> !u64i
+  // CIR: [[INTERM7:%.*]] = cir.binop(and, [[RES7]], [[VAL7]]) : !u64i
+  // CIR: [[RET7:%.*]] =  cir.unary(not, [[INTERM7]]) : !u64i, !u64i
   // LLVM:  [[VAL7:%.*]] = load i8, ptr %{{.*}}, align 1
   // LLVM:  [[CONV7:%.*]] = zext i8 [[VAL7]] to i64
   // LLVM:  [[RES7:%.*]] = atomicrmw nand ptr %{{.*}}, i64 [[CONV7]] seq_cst, align 8

>From c3085d9f64df10f6436e6314d8df1288bdbfdef4 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sun, 11 Jan 2026 16:05:52 +0100
Subject: [PATCH 08/10] fix

---
 clang/test/CIR/CodeGen/atomic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index b9966faaca837..dd27f0463395f 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -16,7 +16,7 @@ void f1(void) {
 // CIR:       }
 
 // LLVM-LABEL: @f1
-// LLVM:         %[[SLOT:.+]] = allocai32, i64 1, align 4
+// LLVM:         %[[SLOT:.+]] = alloca i32, i64 1, align 4
 // LLVM-NEXT:    store i32 42, ptr %[[SLOT]], align 4
 // LLVM:       }
 

>From 23df9204b9194640317460cc70f73930c154bf5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hendrik=20H=C3=BCbner?=
 <117831077+HendrikHuebner at users.noreply.github.com>
Date: Thu, 15 Jan 2026 13:53:15 +0100
Subject: [PATCH 09/10] Update clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 66b95486fd647..befe37c259315 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -1387,7 +1387,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__sync_fetch_and_max:
   case Builtin::BI__sync_fetch_and_umin:
   case Builtin::BI__sync_fetch_and_umax:
-    cgm.errorNYI(e->getSourceRange(), "__sync_fetch_and_* builtins NYI");
+    return errorBuiltinNYI(*this, e, builtinID);
     return getUndefRValue(e->getType());
   case Builtin::BI__sync_add_and_fetch_1:
   case Builtin::BI__sync_add_and_fetch_2:

>From df2a272a948cf6a62a035dc1fffeb454b507136b Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Fri, 16 Jan 2026 01:45:24 +0100
Subject: [PATCH 10/10] fmt

---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index befe37c259315..0e5a5b531df78 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -131,8 +131,10 @@ static mlir::Value makeBinaryAtomicValue(
     // Pointer to pointer
     // `cir.atomic.fetch` expects a pointer to an integer type, so we cast
     // ptr<ptr<T>> to ptr<intPtrSize>
-    cir::IntType ptrSizeInt = builder.getSIntNTy(cgf.getContext().getTypeSize(ptrType));
-    destValue = builder.createBitcast(destValue, builder.getPointerTo(ptrSizeInt));
+    cir::IntType ptrSizeInt =
+        builder.getSIntNTy(cgf.getContext().getTypeSize(ptrType));
+    destValue =
+        builder.createBitcast(destValue, builder.getPointerTo(ptrSizeInt));
     val = emitToInt(cgf, val, type, ptrSizeInt);
   } else {
     // Pointer to integer type
@@ -153,9 +155,9 @@ static mlir::Value makeBinaryAtomicValue(
   }
 
   auto rmwi = cir::AtomicFetchOp::create(
-      builder, cgf.getLoc(expr->getSourceRange()), destValue,
-      val, kind, ordering, false, /* is volatile */
-      true);                      /* fetch first */
+      builder, cgf.getLoc(expr->getSourceRange()), destValue, val, kind,
+      ordering, false, /* is volatile */
+      true);           /* fetch first */
   return rmwi->getResult(0);
 }