[clang] [OpenACC][CIR] Implement 'atomic capture' lowering (PR #168422)

Mon Nov 17 10:48:26 PST 2025

https://github.com/erichkeane created https://github.com/llvm/llvm-project/pull/168422

The 'atomic capture' variant of the `atomic` construct accepts either a single statement, or a compound statement containing two statements. Each of the statements it accepts meet a form of the previous read/write/update forms, or is a combination of two.

The IR node for atomic capture takes two separate other acc.atomics, plus a terminator.

This patch implements all of the lowering for these.

>From b77f1b21739731393d1916b865ca3479d025a646 Mon Sep 17 00:00:00 2001
From: erichkeane <ekeane at nvidia.com>
Date: Fri, 24 Oct 2025 10:32:21 -0700
Subject: [PATCH] [OpenACC][CIR] Implement 'atomic capture' lowering

The 'atomic capture' variant of the `atomic` construct accepts either a
single statement, or a compound statement containing two statements.
Each of the statements it accepts meet a form of the previous
read/write/update forms, or is a combination of two.

The IR node for atomic capture takes two separate other acc.atomics,
plus a terminator.

This patch implements all of the lowering for these.
---
 clang/include/clang/AST/StmtOpenACC.h         |  49 +-
 clang/lib/AST/StmtOpenACC.cpp                 | 257 +++++++--
 clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp   | 198 ++++---
 .../CIR/CodeGenOpenACC/atomic-capture.cpp     | 508 ++++++++++++++++++
 .../openacc-not-implemented.cpp               |   6 +-
 5 files changed, 899 insertions(+), 119 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenOpenACC/atomic-capture.cpp

diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h
index ae8029797a36e..ad4e2d65771b8 100644
--- a/clang/include/clang/AST/StmtOpenACC.h
+++ b/clang/include/clang/AST/StmtOpenACC.h
@@ -818,14 +818,57 @@ class OpenACCAtomicConstruct final
 
   // A struct to represent a broken-down version of the associated statement,
   // providing the information specified in OpenACC3.3 Section 2.12.
-  struct StmtInfo {
+  struct SingleStmtInfo {
+    // Holds the entire expression for this. In the case of a normal
+    // read/write/update, this should just be the associated statement.  in the
+    // case of an update, this is going to be the sub-expression this
+    // represents.
+    const Expr *WholeExpr;
     const Expr *V;
     const Expr *X;
     // Listed as 'expr' in the standard, this is typically a generic expression
     // as a component.
     const Expr *RefExpr;
-    // TODO: OpenACC: We should expand this as we're implementing the other
-    // atomic construct kinds.
+    static SingleStmtInfo Empty() {
+      return {nullptr, nullptr, nullptr, nullptr};
+    }
+
+    static SingleStmtInfo createRead(const Expr *WholeExpr, const Expr *V,
+                                     const Expr *X) {
+      return {WholeExpr, V, X, /*RefExpr=*/nullptr};
+    }
+    static SingleStmtInfo createWrite(const Expr *WholeExpr, const Expr *X,
+                                      const Expr *RefExpr) {
+      return {WholeExpr, /*V=*/nullptr, X, RefExpr};
+    }
+    static SingleStmtInfo createUpdate(const Expr *WholeExpr, const Expr *X) {
+      return {WholeExpr, /*V=*/nullptr, X, /*RefExpr=*/nullptr};
+    }
+  };
+
+  struct StmtInfo {
+    enum class StmtForm {
+      Read,
+      Write,
+      Update,
+      ReadWrite,
+      ReadUpdate,
+      UpdateRead
+    } Form;
+    SingleStmtInfo First, Second;
+
+    static StmtInfo createUpdateRead(SingleStmtInfo First,
+                                     SingleStmtInfo Second) {
+      return {StmtForm::UpdateRead, First, Second};
+    }
+    static StmtInfo createReadWrite(SingleStmtInfo First,
+                                    SingleStmtInfo Second) {
+      return {StmtForm::ReadWrite, First, Second};
+    }
+    static StmtInfo createReadUpdate(SingleStmtInfo First,
+                                     SingleStmtInfo Second) {
+      return {StmtForm::ReadUpdate, First, Second};
+    }
   };
 
   const StmtInfo getAssociatedStmtInfo() const;
diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp
index 39dfa19002da8..91d1e28582ec8 100644
--- a/clang/lib/AST/StmtOpenACC.cpp
+++ b/clang/lib/AST/StmtOpenACC.cpp
@@ -324,30 +324,207 @@ OpenACCAtomicConstruct *OpenACCAtomicConstruct::Create(
   return Inst;
 }
 
-static std::pair<const Expr *, const Expr *> getBinaryOpArgs(const Expr *Op) {
+static std::optional<std::pair<const Expr *, const Expr *>>
+getBinaryAssignOpArgs(const Expr *Op, bool &isCompoundAssign) {
   if (const auto *BO = dyn_cast<BinaryOperator>(Op)) {
-    assert(BO->isAssignmentOp());
-    return {BO->getLHS(), BO->getRHS()};
+    if (!BO->isAssignmentOp())
+      return std::nullopt;
+    isCompoundAssign = BO->isCompoundAssignmentOp();
+    return std::pair<const Expr *, const Expr *>({BO->getLHS(), BO->getRHS()});
   }
 
-  const auto *OO = cast<CXXOperatorCallExpr>(Op);
-  assert(OO->isAssignmentOp());
-  return {OO->getArg(0), OO->getArg(1)};
+  if (const auto *OO = dyn_cast<CXXOperatorCallExpr>(Op)) {
+    if (!OO->isAssignmentOp())
+      return std::nullopt;
+    isCompoundAssign = OO->getOperator() != OO_Equal;
+    return std::pair<const Expr *, const Expr *>(
+        {OO->getArg(0), OO->getArg(1)});
+  }
+  return std::nullopt;
+}
+static std::optional<std::pair<const Expr *, const Expr *>>
+getBinaryAssignOpArgs(const Expr *Op) {
+  bool isCompoundAssign;
+  return getBinaryAssignOpArgs(Op, isCompoundAssign);
 }
 
-static std::pair<bool, const Expr *> getUnaryOpArgs(const Expr *Op) {
+static std::optional<const Expr *> getUnaryOpArgs(const Expr *Op) {
   if (const auto *UO = dyn_cast<UnaryOperator>(Op))
-    return {true, UO->getSubExpr()};
+    return UO->getSubExpr();
 
   if (const auto *OpCall = dyn_cast<CXXOperatorCallExpr>(Op)) {
     // Post-inc/dec have a second unused argument to differentiate it, so we
     // accept -- or ++ as unary, or any operator call with only 1 arg.
     if (OpCall->getNumArgs() == 1 || OpCall->getOperator() != OO_PlusPlus ||
         OpCall->getOperator() != OO_MinusMinus)
-      return {true, OpCall->getArg(0)};
+      return {OpCall->getArg(0)};
   }
 
-  return {false, nullptr};
+  return std::nullopt;
+}
+
+// Read is of the form `v = x;`, where both sides are scalar L-values. This is a
+// BinaryOperator or CXXOperatorCallExpr.
+static std::optional<OpenACCAtomicConstruct::SingleStmtInfo>
+getReadStmtInfo(const Expr *E, bool ForAtomicComputeSingleStmt = false) {
+  std::optional<std::pair<const Expr *, const Expr *>> BinaryArgs =
+      getBinaryAssignOpArgs(E);
+
+  if (!BinaryArgs)
+    return std::nullopt;
+
+  // We want the L-value for each side, so we ignore implicit casts.
+  auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createRead(
+      E, BinaryArgs->first->IgnoreImpCasts(),
+      BinaryArgs->second->IgnoreImpCasts());
+
+  // The atomic compute single-stmt variant has to do a 'fixup' step for the 'X'
+  // value, since it is dependent on the RHS.  So if we're in that version, we
+  // skip the checks on X.
+  if ((!ForAtomicComputeSingleStmt &&
+       (!Res.X->isLValue() || !Res.X->getType()->isScalarType())) ||
+      !Res.V->isLValue() || !Res.V->getType()->isScalarType())
+    return std::nullopt;
+
+  return Res;
+}
+
+// Write supports only the format 'x = expr', where the expression is scalar
+// type, and 'x' is a scalar l value. As above, this can come in 2 forms;
+// Binary Operator or CXXOperatorCallExpr.
+static std::optional<OpenACCAtomicConstruct::SingleStmtInfo>
+getWriteStmtInfo(const Expr *E) {
+  std::optional<std::pair<const Expr *, const Expr *>> BinaryArgs =
+      getBinaryAssignOpArgs(E);
+  if (!BinaryArgs)
+    return std::nullopt;
+  // We want the L-value for ONLY the X side, so we ignore implicit casts. For
+  // the right side (the expr), we emit it as an r-value so we need to
+  // maintain implicit casts.
+  auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createWrite(
+      E, BinaryArgs->first->IgnoreImpCasts(), BinaryArgs->second);
+
+  if (!Res.X->isLValue() || !Res.X->getType()->isScalarType())
+    return std::nullopt;
+  return Res;
+}
+
+static std::optional<OpenACCAtomicConstruct::SingleStmtInfo>
+getUpdateStmtInfo(const Expr *E) {
+  std::optional<const Expr *> UnaryArgs = getUnaryOpArgs(E);
+  if (UnaryArgs) {
+    auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createUpdate(
+        E, (*UnaryArgs)->IgnoreImpCasts());
+
+    if (!Res.X->isLValue() || !Res.X->getType()->isScalarType())
+      return std::nullopt;
+
+    return Res;
+  }
+
+  bool isRHSCompoundAssign = false;
+  std::optional<std::pair<const Expr *, const Expr *>> BinaryArgs =
+      getBinaryAssignOpArgs(E, isRHSCompoundAssign);
+  if (!BinaryArgs)
+    return std::nullopt;
+
+  auto Res = OpenACCAtomicConstruct::SingleStmtInfo::createUpdate(
+      E, BinaryArgs->first->IgnoreImpCasts());
+
+  if (!Res.X->isLValue() || !Res.X->getType()->isScalarType())
+    return std::nullopt;
+
+  // 'update' has to be either a compound-assignment operation, or
+  // assignment-to-a-binary-op. Return nullopt if these are not the case.
+  // If we are already compound-assign, we're done!
+  if (isRHSCompoundAssign)
+    return Res;
+
+  // else we have to check that we have a binary operator.
+  const Expr *RHS = BinaryArgs->second->IgnoreImpCasts();
+
+  if (isa<BinaryOperator>(RHS))
+    return Res;
+  else if (const auto *OO = dyn_cast<CXXOperatorCallExpr>(RHS)) {
+    if (OO->isInfixBinaryOp())
+      return Res;
+  }
+
+  return std::nullopt;
+}
+
+static OpenACCAtomicConstruct::StmtInfo
+getCaptureStmtInfo(const Stmt *AssocStmt) {
+  if (const auto *CmpdStmt = dyn_cast<CompoundStmt>(AssocStmt)) {
+    // We checked during Sema to ensure we only have 2 statements here, and
+    // that both are expressions, we can look at these to see what the valid
+    // options are.
+    const Expr *Stmt1 = cast<Expr>(*CmpdStmt->body().begin())->IgnoreImpCasts();
+    const Expr *Stmt2 =
+        cast<Expr>(*(CmpdStmt->body().begin() + 1))->IgnoreImpCasts();
+    std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Read =
+        getReadStmtInfo(Stmt1);
+
+    if (Read) {
+      // READ : WRITE
+      // v = x; x = expr
+      // READ : UPDATE
+      // v = x; x binop = expr
+      // v = x; x = x binop expr
+      // v = x; x = expr binop x
+      // v = x; x++
+      // v = x; ++x
+      // v = x; x--
+      // v = x; --x
+      std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Update =
+          getUpdateStmtInfo(Stmt2);
+      if (Update)
+        return OpenACCAtomicConstruct::StmtInfo::createReadUpdate(*Read,
+                                                                  *Update);
+
+      std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Write =
+          getWriteStmtInfo(Stmt2);
+      return OpenACCAtomicConstruct::StmtInfo::createReadWrite(*Read, *Write);
+    }
+    // UPDATE: READ
+    // x binop = expr; v = x
+    // x = x binop expr; v = x
+    // x = expr binop x ; v = x
+    // ++ x; v = x
+    // x++; v = x
+    // --x; v = x
+    // x--; v = x
+    std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Update =
+        getUpdateStmtInfo(Stmt1);
+    Read = getReadStmtInfo(Stmt2);
+
+    return OpenACCAtomicConstruct::StmtInfo::createUpdateRead(*Update, *Read);
+  } else {
+    // All of the possible forms (listed below) that are writable as a single
+    // line are expressed as an update, then as a read.  We should be able to
+    // just run these two in the right order.
+    // UPDATE: READ
+    // v = x++;
+    // v = x--;
+    // v = ++x;
+    // v = --x;
+    // v = x binop=expr
+    // v = x = x binop expr
+    // v = x = expr binop x
+
+    const Expr *E = cast<const Expr>(AssocStmt);
+
+    std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Read =
+        getReadStmtInfo(E, /*ForAtomicComputeSingleStmt=*/true);
+    std::optional<OpenACCAtomicConstruct::SingleStmtInfo> Update =
+        getUpdateStmtInfo(Read->X);
+
+    // Fixup this, since the 'X' for the read is the result after write, but is
+    // the same value as the LHS-most variable of the update(its X).
+    Read->X = Update->X;
+    return OpenACCAtomicConstruct::StmtInfo::createUpdateRead(*Update, *Read);
+  }
+  return {};
 }
 
 const OpenACCAtomicConstruct::StmtInfo
@@ -357,48 +534,28 @@ OpenACCAtomicConstruct::getAssociatedStmtInfo() const {
   // asserts to ensure we don't get off into the weeds.
   assert(getAssociatedStmt() && "invalid associated stmt?");
 
-  const Expr *AssocStmt = cast<const Expr>(getAssociatedStmt());
   switch (AtomicKind) {
-  case OpenACCAtomicKind::Capture:
-    assert(false && "Only 'read'/'write'/'update' have been implemented here");
-    return {};
-  case OpenACCAtomicKind::Read: {
-    // Read only supports the format 'v = x'; where both sides are a scalar
-    // expression. This can come in 2 forms; BinaryOperator or
-    // CXXOperatorCallExpr (rarely).
-    std::pair<const Expr *, const Expr *> BinaryArgs =
-        getBinaryOpArgs(AssocStmt);
-    // We want the L-value for each side, so we ignore implicit casts.
-    return {BinaryArgs.first->IgnoreImpCasts(),
-            BinaryArgs.second->IgnoreImpCasts(), /*expr=*/nullptr};
-  }
-  case OpenACCAtomicKind::Write: {
-    // Write supports only the format 'x = expr', where the expression is scalar
-    // type, and 'x' is a scalar l value. As above, this can come in 2 forms;
-    // Binary Operator or CXXOperatorCallExpr.
-    std::pair<const Expr *, const Expr *> BinaryArgs =
-        getBinaryOpArgs(AssocStmt);
-    // We want the L-value for ONLY the X side, so we ignore implicit casts. For
-    // the right side (the expr), we emit it as an r-value so we need to
-    // maintain implicit casts.
-    return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(),
-            BinaryArgs.second};
-  }
+  case OpenACCAtomicKind::Read:
+    return OpenACCAtomicConstruct::StmtInfo{
+        OpenACCAtomicConstruct::StmtInfo::StmtForm::Read,
+        *getReadStmtInfo(cast<const Expr>(getAssociatedStmt())),
+        OpenACCAtomicConstruct::SingleStmtInfo::Empty()};
+
+  case OpenACCAtomicKind::Write:
+    return OpenACCAtomicConstruct::StmtInfo{
+        OpenACCAtomicConstruct::StmtInfo::StmtForm::Write,
+        *getWriteStmtInfo(cast<const Expr>(getAssociatedStmt())),
+        OpenACCAtomicConstruct::SingleStmtInfo::Empty()};
+
   case OpenACCAtomicKind::None:
-  case OpenACCAtomicKind::Update: {
-    std::pair<bool, const Expr *> UnaryArgs = getUnaryOpArgs(AssocStmt);
-    if (UnaryArgs.first)
-      return {/*v=*/nullptr, UnaryArgs.second->IgnoreImpCasts(),
-              /*expr=*/nullptr};
-
-    std::pair<const Expr *, const Expr *> BinaryArgs =
-        getBinaryOpArgs(AssocStmt);
-    // For binary args, we just store the RHS as an expression (in the
-    // expression slot), since the codegen just wants the whole thing for a
-    // recipe.
-    return {/*v=*/nullptr, BinaryArgs.first->IgnoreImpCasts(),
-            BinaryArgs.second};
-  }
+  case OpenACCAtomicKind::Update:
+    return OpenACCAtomicConstruct::StmtInfo{
+        OpenACCAtomicConstruct::StmtInfo::StmtForm::Update,
+        *getUpdateStmtInfo(cast<const Expr>(getAssociatedStmt())),
+        OpenACCAtomicConstruct::SingleStmtInfo::Empty()};
+
+  case OpenACCAtomicKind::Capture:
+    return getCaptureStmtInfo(getAssociatedStmt());
   }
 
   llvm_unreachable("unknown OpenACC atomic kind");
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
index 9e55bd5b7ae71..e103c66549b4d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
@@ -314,15 +314,80 @@ const VarDecl *getLValueDecl(const Expr *e) {
   return cast<VarDecl>(dre->getDecl());
 }
 
-mlir::LogicalResult
-CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
-  // For now, we are only support 'read'/'write'/'update', so diagnose. We can
-  // switch on the kind later once we implement the 'capture' form.
-  if (s.getAtomicKind() == OpenACCAtomicKind::Capture) {
-    cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
-    return mlir::failure();
+static mlir::acc::AtomicReadOp
+emitAtomicRead(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
+               mlir::Location start,
+               OpenACCAtomicConstruct::SingleStmtInfo inf) {
+  // Atomic 'read' only permits 'v = x', where v and x are both scalar L
+  // values. The getAssociatedStmtInfo strips off implicit casts, which
+  // includes implicit conversions and L-to-R-Value conversions, so we can
+  // just emit it as an L value.  The Flang implementation has no problem with
+  // different types, so it appears that the dialect can handle the
+  // conversions.
+  mlir::Value v = cgf.emitLValue(inf.V).getPointer();
+  mlir::Value x = cgf.emitLValue(inf.X).getPointer();
+  mlir::Type resTy = cgf.convertType(inf.V->getType());
+  return mlir::acc::AtomicReadOp::create(builder, start, x, v, resTy,
+                                         /*ifCond=*/{});
+}
+
+static mlir::acc::AtomicWriteOp
+emitAtomicWrite(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
+                mlir::Location start,
+                OpenACCAtomicConstruct::SingleStmtInfo inf) {
+  mlir::Value x = cgf.emitLValue(inf.X).getPointer();
+  mlir::Value expr = cgf.emitAnyExpr(inf.RefExpr).getValue();
+  return mlir::acc::AtomicWriteOp::create(builder, start, x, expr,
+                                          /*ifCond=*/{});
+}
+
+static std::pair<mlir::LogicalResult, mlir::acc::AtomicUpdateOp>
+emitAtomicUpdate(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
+                 mlir::Location start, mlir::Location end,
+                 OpenACCAtomicConstruct::SingleStmtInfo inf) {
+  mlir::Value x = cgf.emitLValue(inf.X).getPointer();
+  auto op = mlir::acc::AtomicUpdateOp::create(builder, start, x, /*ifCond=*/{});
+
+  mlir::LogicalResult res = mlir::success();
+  {
+    mlir::OpBuilder::InsertionGuard guardCase(builder);
+    mlir::Type argTy = cast<cir::PointerType>(x.getType()).getPointee();
+    std::array<mlir::Type, 1> recipeType{argTy};
+    std::array<mlir::Location, 1> recipeLoc{start};
+    auto *recipeBlock = builder.createBlock(
+        &op.getRegion(), op.getRegion().end(), recipeType, recipeLoc);
+    builder.setInsertionPointToEnd(recipeBlock);
+    // Since we have an initial value that we know is a scalar type, we can
+    // just emit the entire statement here after sneaking-in our 'alloca' in
+    // the right place, then loading out of it. Flang does a lot less work
+    // (probably does its own emitting!), but we have more complicated AST
+    // nodes to worry about, so we can just count on opt to remove the extra
+    // alloca/load/store set.
+    auto alloca = cir::AllocaOp::create(
+        builder, start, x.getType(), argTy, "x_var",
+        cgf.cgm.getSize(
+            cgf.getContext().getTypeAlignInChars(inf.X->getType())));
+
+    alloca.setInitAttr(mlir::UnitAttr::get(&cgf.getMLIRContext()));
+    builder.CIRBaseBuilderTy::createStore(start, recipeBlock->getArgument(0),
+                                          alloca);
+
+    const VarDecl *xval = getLValueDecl(inf.X);
+    CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, xval};
+    cgf.replaceAddrOfLocalVar(
+        xval, Address{alloca, argTy, cgf.getContext().getDeclAlign(xval)});
+
+    res = cgf.emitStmt(inf.WholeExpr, /*useCurrentScope=*/true);
+
+    auto load = cir::LoadOp::create(builder, start, {alloca});
+    mlir::acc::YieldOp::create(builder, end, {load});
   }
 
+  return {res, op};
+}
+
+mlir::LogicalResult
+CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
   // While Atomic is an 'associated statement' construct, it 'steals' the
   // expression it is associated with rather than emitting it inside of it.  So
   // it has custom emit logic.
@@ -331,78 +396,89 @@ CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
   OpenACCAtomicConstruct::StmtInfo inf = s.getAssociatedStmtInfo();
 
   switch (s.getAtomicKind()) {
-  case OpenACCAtomicKind::Capture:
-    llvm_unreachable("Unimplemented atomic construct type, should have "
-                     "diagnosed/returned above");
-    return mlir::failure();
   case OpenACCAtomicKind::Read: {
-
-    // Atomic 'read' only permits 'v = x', where v and x are both scalar L
-    // values. The getAssociatedStmtInfo strips off implicit casts, which
-    // includes implicit conversions and L-to-R-Value conversions, so we can
-    // just emit it as an L value.  The Flang implementation has no problem with
-    // different types, so it appears that the dialect can handle the
-    // conversions.
-    mlir::Value v = emitLValue(inf.V).getPointer();
-    mlir::Value x = emitLValue(inf.X).getPointer();
-    mlir::Type resTy = convertType(inf.V->getType());
-    auto op = mlir::acc::AtomicReadOp::create(builder, start, x, v, resTy,
-                                              /*ifCond=*/{});
+    assert(inf.Form == OpenACCAtomicConstruct::StmtInfo::StmtForm::Read);
+    mlir::acc::AtomicReadOp op =
+        emitAtomicRead(*this, builder, start, inf.First);
     emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
                        s.clauses());
     return mlir::success();
   }
   case OpenACCAtomicKind::Write: {
-    mlir::Value x = emitLValue(inf.X).getPointer();
-    mlir::Value expr = emitAnyExpr(inf.RefExpr).getValue();
-    auto op = mlir::acc::AtomicWriteOp::create(builder, start, x, expr,
-                                               /*ifCond=*/{});
+    assert(inf.Form == OpenACCAtomicConstruct::StmtInfo::StmtForm::Write);
+    auto op = emitAtomicWrite(*this, builder, start, inf.First);
     emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
                        s.clauses());
     return mlir::success();
   }
   case OpenACCAtomicKind::None:
   case OpenACCAtomicKind::Update: {
-    mlir::Value x = emitLValue(inf.X).getPointer();
-    auto op =
-        mlir::acc::AtomicUpdateOp::create(builder, start, x, /*ifCond=*/{});
+    assert(inf.Form == OpenACCAtomicConstruct::StmtInfo::StmtForm::Update);
+    auto [res, op] = emitAtomicUpdate(*this, builder, start, end, inf.First);
+    emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
+                       s.clauses());
+    return res;
+  }
+  case OpenACCAtomicKind::Capture: {
+    // Atomic-capture is made up of two statements, either an update = read,
+    // read + update, or read + write.  As a result, the IR represents the
+    // capture region as having those two 'inside' of it.
+    auto op = mlir::acc::AtomicCaptureOp::create(builder, start, /*ifCond=*/{});
     emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
                        s.clauses());
     mlir::LogicalResult res = mlir::success();
     {
       mlir::OpBuilder::InsertionGuard guardCase(builder);
-      mlir::Type argTy = cast<cir::PointerType>(x.getType()).getPointee();
-      std::array<mlir::Type, 1> recipeType{argTy};
-      std::array<mlir::Location, 1> recipeLoc{start};
-      mlir::Block *recipeBlock = builder.createBlock(
-          &op.getRegion(), op.getRegion().end(), recipeType, recipeLoc);
-      builder.setInsertionPointToEnd(recipeBlock);
-
-      // Since we have an initial value that we know is a scalar type, we can
-      // just emit the entire statement here after sneaking-in our 'alloca' in
-      // the right place, then loading out of it. Flang does a lot less work
-      // (probably does its own emitting!), but we have more complicated AST
-      // nodes to worry about, so we can just count on opt to remove the extra
-      // alloca/load/store set.
-      auto alloca = cir::AllocaOp::create(
-          builder, start, x.getType(), argTy, "x_var",
-          cgm.getSize(getContext().getTypeAlignInChars(inf.X->getType())));
-
-      alloca.setInitAttr(mlir::UnitAttr::get(&getMLIRContext()));
-      builder.CIRBaseBuilderTy::createStore(start, recipeBlock->getArgument(0),
-                                            alloca);
-
-      const VarDecl *xval = getLValueDecl(inf.X);
-      CIRGenFunction::DeclMapRevertingRAII declMapRAII{*this, xval};
-      replaceAddrOfLocalVar(
-          xval, Address{alloca, argTy, getContext().getDeclAlign(xval)});
-
-      res = emitStmt(s.getAssociatedStmt(), /*useCurrentScope=*/true);
-
-      auto load = cir::LoadOp::create(builder, start, {alloca});
-      mlir::acc::YieldOp::create(builder, end, {load});
-    }
 
+      mlir::Block *block =
+          builder.createBlock(&op.getRegion(), op.getRegion().end(), {}, {});
+
+      builder.setInsertionPointToStart(block);
+
+      auto terminator = mlir::acc::TerminatorOp::create(builder, end);
+
+      // The AtomicCaptureOp only permits the two acc.atomic.* operations inside
+      // of it, so all other parts of the expression need to be emitted before
+      // the AtomicCaptureOp, then moved into place.
+      builder.setInsertionPoint(op);
+
+      switch (inf.Form) {
+      default:
+        llvm_unreachable("invalid form for Capture");
+      case OpenACCAtomicConstruct::StmtInfo::StmtForm::ReadWrite: {
+        mlir::acc::AtomicReadOp first =
+            emitAtomicRead(*this, builder, start, inf.First);
+        mlir::acc::AtomicWriteOp second =
+            emitAtomicWrite(*this, builder, start, inf.Second);
+
+        first->moveBefore(terminator);
+        second->moveBefore(terminator);
+        break;
+      }
+      case OpenACCAtomicConstruct::StmtInfo::StmtForm::ReadUpdate: {
+        mlir::acc::AtomicReadOp first =
+            emitAtomicRead(*this, builder, start, inf.First);
+        auto [this_res, second] =
+            emitAtomicUpdate(*this, builder, start, end, inf.Second);
+        res = this_res;
+
+        first->moveBefore(terminator);
+        second->moveBefore(terminator);
+        break;
+      }
+      case OpenACCAtomicConstruct::StmtInfo::StmtForm::UpdateRead: {
+        auto [this_res, first] =
+            emitAtomicUpdate(*this, builder, start, end, inf.First);
+        res = this_res;
+        mlir::acc::AtomicReadOp second =
+            emitAtomicRead(*this, builder, start, inf.Second);
+
+        first->moveBefore(terminator);
+        second->moveBefore(terminator);
+        break;
+      }
+      }
+    }
     return res;
   }
   }
diff --git a/clang/test/CIR/CodeGenOpenACC/atomic-capture.cpp b/clang/test/CIR/CodeGenOpenACC/atomic-capture.cpp
new file mode 100644
index 0000000000000..5f9a43fbc0f05
--- /dev/null
+++ b/clang/test/CIR/CodeGenOpenACC/atomic-capture.cpp
@@ -0,0 +1,508 @@
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+
+struct HasOps {
+  operator float();
+  int thing();
+  int operator++();
+  int operator++(int);
+};
+
+void use(int x, int v, float f, HasOps ops) {
+  // CHECK: cir.func{{.*}}(%[[X_ARG:.*]]: !s32i{{.*}}, %[[V_ARG:.*]]: !s32i{{.*}}, %[[F_ARG:.*]]: !cir.float{{.*}}){{.*}}, %[[OPS_ARG:.*]]: !rec_HasOps{{.*}}) {
+  // CHECK-NEXT: %[[X_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+  // CHECK-NEXT: %[[V_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["v", init]
+  // CHECK-NEXT: %[[F_ALLOCA:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["f", init]
+  // CHECK-NEXT: %[[OPS_ALLOCA:.*]] = cir.alloca !rec_HasOps, !cir.ptr<!rec_HasOps>, ["ops", init]
+  // CHECK-NEXT: cir.store %[[X_ARG]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+  // CHECK-NEXT: cir.store %[[V_ARG]], %[[V_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+  // CHECK-NEXT: cir.store %[[F_ARG]], %[[F_ALLOCA]] : !cir.float, !cir.ptr<!cir.float>
+  // CHECK-NEXT: cir.store %[[OPS_ARG]], %[[OPS_ALLOCA]] : !rec_HasOps, !cir.ptr<!rec_HasOps>
+
+  // CHECK-NEXT: %[[X_LOAD:.*]] = cir.load{{.*}} %[[X_ALLOCA]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[V_LOAD:.*]] = cir.load{{.*}} %[[V_ALLOCA]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[X_LOAD]], %[[V_LOAD]]) : !s32i, !cir.bool
+  // CHECK-NEXT: %[[IF_COND_CAST:.*]] = builtin.unrealized_conversion_cast %[[CMP:.*]] : !cir.bool to i1
+  // CHECK-NEXT: acc.atomic.capture if(%[[IF_COND_CAST]]) {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture if (x != v)
+  v = x++;
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  v = ++x;
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  v = x--;
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  // 
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  v = --x;
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[X_CAST]], %[[MUL]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[ADD]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  v = x += f * 1;
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[X_CAST]], %[[ADD]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[MUL]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  v = x = x * (f + 1);
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[ADD]], %[[X_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[MUL]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  v = x = (f + 1) * x;
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[MUL:.*]] = cir.binop(mul, %[[X_CAST]], %[[ADD]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[MUL]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x; x *= f + 1;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[SUB:.*]] = cir.binop(sub, %[[X_CAST]], %[[ADD]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[SUB]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    x -= f + 1;
+    v = x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[DIV:.*]] = cir.binop(div, %[[X_CAST]], %[[ADD]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[DIV]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    x = x / (f + 1);
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[OPS_CONV:.*]] = cir.call @{{.*}}(%[[OPS_ALLOCA]]) : (!cir.ptr<!rec_HasOps>) -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[OPS_CONV]]) : !cir.float
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[DIV:.*]] = cir.binop(div, %[[ADD]], %[[X_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[DIV]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    x = (f + ops) / x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[ONE_INT:.*]] = cir.const #cir.int<1> : !s32i
+  // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.cast int_to_float %[[ONE_INT]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[ONE_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[DIV:.*]] = cir.binop(div, %[[X_CAST]], %[[ADD]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[DIV]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    x = x / (f + 1);
+    v = x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[F_LOAD:.*]] = cir.load{{.*}} %[[F_ALLOCA]] : !cir.ptr<!cir.float>, !cir.float
+  // CHECK-NEXT: %[[OPS_CONV:.*]] = cir.call @{{.*}}(%[[OPS_ALLOCA]]) : (!cir.ptr<!rec_HasOps>) -> !cir.float
+  // CHECK-NEXT: %[[ADD:.*]] = cir.binop(add, %[[F_LOAD]], %[[OPS_CONV]]) : !cir.float
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[X_CAST:.*]] = cir.cast int_to_float %[[X_VAR_LOAD]] : !s32i -> !cir.float
+  // CHECK-NEXT: %[[DIV:.*]] = cir.binop(div, %[[ADD]], %[[X_CAST]]) : !cir.float
+  // CHECK-NEXT: %[[INT_CAST:.*]] = cir.cast float_to_int %[[DIV]] : !cir.float -> !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INT_CAST]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    x = (f + ops) / x;
+    v = x;
+  }
+
+  // CHECK-NEXT: %[[OPS_CONV:.*]] = cir.call @{{.*}}(%[[OPS_ALLOCA]]) : (!cir.ptr<!rec_HasOps>) -> !cir.float
+  // CHECK-NEXT: %[[OPS_CONV_TO_INT:.*]] = cir.cast float_to_int %[[OPS_CONV]] : !cir.float -> !s32i
+  //
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.write %[[X_ALLOCA]] = %[[OPS_CONV_TO_INT]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    x = ops;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    x++;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    ++x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    x++;
+    v = x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[INC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    ++x;
+    v = x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[DEC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    x--;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[DEC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    v = x;
+    --x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[DEC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    x--;
+    v = x;
+  }
+
+  // CHECK-NEXT: acc.atomic.capture {
+  // CHECK-NEXT: acc.atomic.update %[[X_ALLOCA]] : !cir.ptr<!s32i> {
+  // CHECK-NEXT: ^bb0(%[[X_VAR:.*]]: !s32i{{.*}}):
+  // CHECK-NEXT: %[[X_VAR_ALLOC:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x_var", init]
+  // CHECK-NEXT: cir.store %[[X_VAR]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[DEC:.*]] = cir.unary(dec, %[[X_VAR_LOAD]]) nsw : !s32i, !s32i
+  // CHECK-NEXT: cir.store{{.*}} %[[DEC]], %[[X_VAR_ALLOC]] : !s32i, !cir.ptr<!s32i>
+  //
+  // CHECK-NEXT: %[[X_VAR_LOAD:.*]] = cir.load{{.*}} %[[X_VAR_ALLOC]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: acc.yield %[[X_VAR_LOAD]] : !s32i
+  // CHECK-NEXT: }
+  // CHECK-NEXT: acc.atomic.read %[[V_ALLOCA]] = %[[X_ALLOCA]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: }
+#pragma acc atomic capture
+  {
+    --x;
+    v = x;
+  }
+}
diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp
index b4d76e18bf345..e85c26718acb8 100644
--- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp
@@ -1,10 +1,6 @@
 // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -fopenacc -fclangir -emit-cir %s -o %t.cir -verify
 
-void HelloWorld(int *A, int *B, int *C, int N) {
-
-// expected-error at +1{{ClangIR code gen Not Yet Implemented: OpenACC Atomic Construct}}
-#pragma acc atomic capture
-  B = A += ++N;
+void HelloWorld(int *A) {
 
 // expected-error at +1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}}
 #pragma acc declare create(A)