[clang] [CIR] Add codegen for atomic fence builtin with non-const memory order (PR #172455)

Wed Dec 17 21:38:24 PST 2025

https://github.com/Luhaocong updated https://github.com/llvm/llvm-project/pull/172455

>From 1e7e9a6c75e70fad9549a780eccd8fb67d4c9f2b Mon Sep 17 00:00:00 2001
From: Haocong Lu <haocong.lu at witintech.com>
Date: Tue, 16 Dec 2025 16:15:50 +0800
Subject: [PATCH 1/2] [CIR] Support codegen for atomic fence builtin with
 non-const memory order

- Support CIR codegen for follow atomic fence builtin when the memory order
  is non constant:
  `__atomic_thread_fence`
  `__atomic_signal_fence`
  `__c11_atomic_thread_fence`
  `__c11_atomic_signal_fence`
- Refactor current implementation when the memory order is constant,
  the argument expression at AST is evaluated as a constant directly.
- Add test cases that cover all kinds of memory order.
---
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp      | 104 ++++-
 clang/test/CIR/CodeGen/atomic-thread-fence.c | 424 +++++++++++++++++++
 2 files changed, 508 insertions(+), 20 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index b4f02c97f539a..cab81c44f8d77 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -63,25 +63,89 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e,
 static void emitAtomicFenceOp(CIRGenFunction &cgf, const CallExpr *expr,
                               cir::SyncScopeKind syncScope) {
   CIRGenBuilderTy &builder = cgf.getBuilder();
-  mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));
-
-  auto constOrdering = orderingVal.getDefiningOp<cir::ConstantOp>();
-
-  if (!constOrdering) {
-    // TODO(cir): Emit code to switch on `orderingVal`,
-    //            and creating the fence op for valid values.
-    cgf.cgm.errorNYI("Variable atomic fence ordering");
+  mlir::Location loc = cgf.getLoc(expr->getSourceRange());
+
+  // Convert the memory order specified by user to effective one:
+  //   Relaxed                -> std::nullopt
+  //   Consume/Acquire        -> Acquire
+  //   Release                -> Release
+  //   AcquireRelease         -> AcquireRelease
+  //   SequentiallyConsistent -> SequentiallyConsistent
+  auto getEffectiveMemOrder =
+      [](cir::MemOrder oriOrder) -> std::optional<cir::MemOrder> {
+    if (oriOrder == cir::MemOrder::Relaxed)
+      return std::nullopt;
+    else if (oriOrder == cir::MemOrder::Consume ||
+             oriOrder == cir::MemOrder::Acquire)
+      return cir::MemOrder::Acquire;
+    else
+      return oriOrder;
+  };
+
+  // Handle constant memory ordering.
+  Expr::EvalResult eval;
+  if (expr->getArg(0)->EvaluateAsInt(eval, cgf.getContext())) {
+    uint64_t constOrder = eval.Val.getInt().getZExtValue();
+    // Not emit anything if it's an invalid constant.
+    if (!cir::isValidCIRAtomicOrderingCABI(constOrder))
+      return;
+    cir::MemOrder caseOrder = static_cast<cir::MemOrder>(constOrder);
+    if (std::optional<cir::MemOrder> order = getEffectiveMemOrder(caseOrder))
+      cir::AtomicFenceOp::create(
+          builder, loc, order.value(),
+          cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
     return;
   }
 
-  auto constOrderingAttr = constOrdering.getValueAttr<cir::IntAttr>();
-  assert(constOrderingAttr && "Expected integer constant for ordering");
-
-  auto ordering = static_cast<cir::MemOrder>(constOrderingAttr.getUInt());
-
-  cir::AtomicFenceOp::create(
-      builder, cgf.getLoc(expr->getSourceRange()), ordering,
-      cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
+  // Otherwise, handle variable memory ordering. Emit `SwitchOp` to convert
+  // dynamic value to static value.
+  mlir::Value varOrder = cgf.emitScalarExpr(expr->getArg(0));
+  cir::SwitchOp::create(
+      builder, loc, varOrder,
+      [&](mlir::OpBuilder &, mlir::Location loc, mlir::OperationState &) {
+        mlir::Block *switchBlock = builder.getBlock();
+
+        auto emitMemOrderCase = [&](llvm::ArrayRef<cir::MemOrder> caseOrders) {
+          if (caseOrders.empty()) {
+            // Creating default case operation
+            mlir::OpBuilder::InsertPoint insertPoint;
+            cir::CaseOp::create(builder, loc, builder.getArrayAttr({}),
+                                cir::CaseOpKind::Default, insertPoint);
+            builder.restoreInsertionPoint(insertPoint);
+          } else if (auto actualOrder = getEffectiveMemOrder(caseOrders[0])) {
+            // Creating case operation for effective memory order. If there are
+            // multiple cases in `caseOrders`, the actual order of each case
+            // must be same, this needs to be guaranteed by the caller.
+            mlir::OpBuilder::InsertPoint insertPoint;
+            llvm::SmallVector<mlir::Attribute, 2> orderAttrs;
+            for (cir::MemOrder caseOrder : caseOrders)
+              orderAttrs.push_back(cir::IntAttr::get(
+                  varOrder.getType(), static_cast<int>(caseOrder)));
+            cir::CaseOp::create(builder, loc, builder.getArrayAttr(orderAttrs),
+                                cir::CaseOpKind::Anyof, insertPoint);
+            // Creating atomic fence operation
+            builder.restoreInsertionPoint(insertPoint);
+            cir::AtomicFenceOp::create(
+                builder, loc, actualOrder.value(),
+                cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
+          } else {
+            // Do nothing if unneccssary (!caseOrders.empty() && !actualOrder)
+            return;
+          }
+          builder.createBreak(loc);
+          builder.setInsertionPointToEnd(switchBlock);
+          return;
+        };
+
+        emitMemOrderCase(/*default:*/ {});
+        emitMemOrderCase({cir::MemOrder::Relaxed}); // Not effective
+        emitMemOrderCase({cir::MemOrder::Consume, cir::MemOrder::Acquire});
+        emitMemOrderCase({cir::MemOrder::Release});
+        emitMemOrderCase({cir::MemOrder::AcquireRelease});
+        emitMemOrderCase({cir::MemOrder::SequentiallyConsistent});
+
+        builder.createYield(loc);
+      });
 }
 
 namespace {
@@ -1007,16 +1071,16 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
   case Builtin::BI__atomic_test_and_set:
   case Builtin::BI__atomic_clear:
     return errorBuiltinNYI(*this, e, builtinID);
-  case Builtin::BI__atomic_thread_fence: {
+  case Builtin::BI__atomic_thread_fence:
+  case Builtin::BI__c11_atomic_thread_fence: {
     emitAtomicFenceOp(*this, e, cir::SyncScopeKind::System);
     return RValue::get(nullptr);
   }
-  case Builtin::BI__atomic_signal_fence: {
+  case Builtin::BI__atomic_signal_fence:
+  case Builtin::BI__c11_atomic_signal_fence: {
     emitAtomicFenceOp(*this, e, cir::SyncScopeKind::SingleThread);
     return RValue::get(nullptr);
   }
-  case Builtin::BI__c11_atomic_thread_fence:
-  case Builtin::BI__c11_atomic_signal_fence:
   case Builtin::BI__scoped_atomic_thread_fence:
   case Builtin::BI__builtin_signbit:
   case Builtin::BI__builtin_signbitf:
diff --git a/clang/test/CIR/CodeGen/atomic-thread-fence.c b/clang/test/CIR/CodeGen/atomic-thread-fence.c
index f28bc6808cbfa..69298fb9cd7b0 100644
--- a/clang/test/CIR/CodeGen/atomic-thread-fence.c
+++ b/clang/test/CIR/CodeGen/atomic-thread-fence.c
@@ -179,3 +179,427 @@ void loadWithSignalFence(DataPtr d) {
   // OGCG:    %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
   // OGCG:    ret void
 }
+
+void const_atomic_thread_fence() {
+  __atomic_thread_fence(__ATOMIC_RELAXED);
+  __atomic_thread_fence(__ATOMIC_CONSUME);
+  __atomic_thread_fence(__ATOMIC_ACQUIRE);
+  __atomic_thread_fence(__ATOMIC_RELEASE);
+  __atomic_thread_fence(__ATOMIC_ACQ_REL);
+  __atomic_thread_fence(__ATOMIC_SEQ_CST);
+  // CIR-LABEL: const_atomic_thread_fence
+  // CIR: cir.atomic.fence syncscope(system) acquire
+  // CIR: cir.atomic.fence syncscope(system) acquire
+  // CIR: cir.atomic.fence syncscope(system) release
+  // CIR: cir.atomic.fence syncscope(system) acq_rel
+  // CIR: cir.atomic.fence syncscope(system) seq_cst
+
+  // LLVM-LABEL: const_atomic_thread_fence
+  // LLVM: fence acquire
+  // LLVM: fence acquire
+  // LLVM: fence release
+  // LLVM: fence acq_rel
+  // LLVM: fence seq_cst
+
+  // OGCG-LABEL: const_atomic_thread_fence
+  // OGCG: fence acquire
+  // OGCG: fence acquire
+  // OGCG: fence release
+  // OGCG: fence acq_rel
+  // OGCG: fence seq_cst
+}
+
+void const_c11_atomic_thread_fence() {
+  __c11_atomic_thread_fence(__ATOMIC_RELAXED);
+  __c11_atomic_thread_fence(__ATOMIC_CONSUME);
+  __c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
+  __c11_atomic_thread_fence(__ATOMIC_RELEASE);
+  __c11_atomic_thread_fence(__ATOMIC_ACQ_REL);
+  __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+  // CIR-LABEL: const_c11_atomic_thread_fence
+  // CIR: cir.atomic.fence syncscope(system) acquire
+  // CIR: cir.atomic.fence syncscope(system) acquire
+  // CIR: cir.atomic.fence syncscope(system) release
+  // CIR: cir.atomic.fence syncscope(system) acq_rel
+  // CIR: cir.atomic.fence syncscope(system) seq_cst
+
+  // LLVM-LABEL: const_c11_atomic_thread_fence
+  // LLVM: fence acquire
+  // LLVM: fence acquire
+  // LLVM: fence release
+  // LLVM: fence acq_rel
+  // LLVM: fence seq_cst
+
+  // OGCG-LABEL: const_c11_atomic_thread_fence
+  // OGCG: fence acquire
+  // OGCG: fence acquire
+  // OGCG: fence release
+  // OGCG: fence acq_rel
+  // OGCG: fence seq_cst
+}
+
+void const_atomic_signal_fence() {
+  __atomic_signal_fence(__ATOMIC_RELAXED);
+  __atomic_signal_fence(__ATOMIC_CONSUME);
+  __atomic_signal_fence(__ATOMIC_ACQUIRE);
+  __atomic_signal_fence(__ATOMIC_RELEASE);
+  __atomic_signal_fence(__ATOMIC_ACQ_REL);
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
+  // CIR-LABEL: const_atomic_signal_fence
+  // CIR: cir.atomic.fence syncscope(single_thread) acquire
+  // CIR: cir.atomic.fence syncscope(single_thread) acquire
+  // CIR: cir.atomic.fence syncscope(single_thread) release
+  // CIR: cir.atomic.fence syncscope(single_thread) acq_rel
+  // CIR: cir.atomic.fence syncscope(single_thread) seq_cst
+
+  // LLVM-LABEL: const_atomic_signal_fence
+  // LLVM: fence syncscope("singlethread") acquire
+  // LLVM: fence syncscope("singlethread") acquire
+  // LLVM: fence syncscope("singlethread") release
+  // LLVM: fence syncscope("singlethread") acq_rel
+  // LLVM: fence syncscope("singlethread") seq_cst
+
+  // OGCG--LABEL: const_atomic_signal_fence
+  // OGCG: fence syncscope("singlethread") acquire
+  // OGCG: fence syncscope("singlethread") acquire
+  // OGCG: fence syncscope("singlethread") release
+  // OGCG: fence syncscope("singlethread") acq_rel
+  // OGCG: fence syncscope("singlethread") seq_cst
+}
+
+void const_c11_atomic_signal_fence() {
+  __c11_atomic_signal_fence(__ATOMIC_RELAXED);
+  __c11_atomic_signal_fence(__ATOMIC_CONSUME);
+  __c11_atomic_signal_fence(__ATOMIC_ACQUIRE);
+  __c11_atomic_signal_fence(__ATOMIC_RELEASE);
+  __c11_atomic_signal_fence(__ATOMIC_ACQ_REL);
+  __c11_atomic_signal_fence(__ATOMIC_SEQ_CST);
+  // CIR-LABEL: const_c11_atomic_signal_fence
+  // CIR: cir.atomic.fence syncscope(single_thread) acquire
+  // CIR: cir.atomic.fence syncscope(single_thread) acquire
+  // CIR: cir.atomic.fence syncscope(single_thread) release
+  // CIR: cir.atomic.fence syncscope(single_thread) acq_rel
+  // CIR: cir.atomic.fence syncscope(single_thread) seq_cst
+
+  // LLVM-LABEL: const_c11_atomic_signal_fence
+  // LLVM: fence syncscope("singlethread") acquire
+  // LLVM: fence syncscope("singlethread") acquire
+  // LLVM: fence syncscope("singlethread") release
+  // LLVM: fence syncscope("singlethread") acq_rel
+  // LLVM: fence syncscope("singlethread") seq_cst
+
+  // OGCG-LABEL: const_c11_atomic_signal_fence
+  // OGCG: fence syncscope("singlethread") acquire
+  // OGCG: fence syncscope("singlethread") acquire
+  // OGCG: fence syncscope("singlethread") release
+  // OGCG: fence syncscope("singlethread") acq_rel
+  // OGCG: fence syncscope("singlethread") seq_cst
+}
+
+void variable_atomic_thread_fences(int memorder) {
+  __atomic_thread_fence(memorder);
+  // CIR-LABEL: variable_atomic_thread_fences
+  // CIR:  cir.switch
+  // CIR:    cir.case(default, []) {
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system) acquire
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<3> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system) release
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<4> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system) acq_rel
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<5> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system)
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.yield
+  // CIR:  }
+
+  // LLVM-LABEL: variable_atomic_thread_fences
+  // LLVM:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // LLVM:   br label %[[SWITCH_BLK:.+]]
+  // LLVM: [[SWITCH_BLK]]:
+  // LLVM:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // LLVM:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // LLVM:     i32 2, label %[[ACQUIRE_BLK]]
+  // LLVM:     i32 3, label %[[RELEASE_BLK:.+]]
+  // LLVM:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // LLVM:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // LLVM:   ]
+  // LLVM: [[DEFAULT_BLK]]:
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQUIRE_BLK]]:
+  // LLVM:   fence acquire
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[RELEASE_BLK]]:
+  // LLVM:   fence release
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQ_REL_BLK]]:
+  // LLVM:   fence acq_rel
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[SEQ_CST_BLK]]:
+  // LLVM:   fence seq_cst
+  // LLVM:   br label %{{.+}}
+
+  // OGCG-LABEL: variable_atomic_thread_fences
+  // OGCG:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // OGCG:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // OGCG:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // OGCG:     i32 2, label %[[ACQUIRE_BLK]]
+  // OGCG:     i32 3, label %[[RELEASE_BLK:.+]]
+  // OGCG:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // OGCG:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // OGCG:   ]
+  // OGCG: [[ACQUIRE_BLK]]:
+  // OGCG:   fence acquire
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[RELEASE_BLK]]:
+  // OGCG:   fence release
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[ACQ_REL_BLK]]:
+  // OGCG:   fence acq_rel
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[SEQ_CST_BLK]]:
+  // OGCG:   fence seq_cst
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[DEFAULT_BLK]]:
+  // OGCG:   ret void
+}
+
+void variable_c11_atomic_thread_fences(int memorder) {
+  __c11_atomic_thread_fence(memorder);
+  // CIR-LABEL: variable_c11_atomic_thread_fences
+  // CIR:  cir.switch
+  // CIR:    cir.case(default, []) {
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system) acquire
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<3> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system) release
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<4> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system) acq_rel
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<5> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(system)
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.yield
+  // CIR:  }
+
+  // LLVM-LABEL: variable_c11_atomic_thread_fences
+  // LLVM:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // LLVM:   br label %[[SWITCH_BLK:.+]]
+  // LLVM: [[SWITCH_BLK]]:
+  // LLVM:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // LLVM:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // LLVM:     i32 2, label %[[ACQUIRE_BLK]]
+  // LLVM:     i32 3, label %[[RELEASE_BLK:.+]]
+  // LLVM:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // LLVM:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // LLVM:   ]
+  // LLVM: [[DEFAULT_BLK]]:
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQUIRE_BLK]]:
+  // LLVM:   fence acquire
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[RELEASE_BLK]]:
+  // LLVM:   fence release
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQ_REL_BLK]]:
+  // LLVM:   fence acq_rel
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[SEQ_CST_BLK]]:
+  // LLVM:   fence seq_cst
+  // LLVM:   br label %{{.+}}
+
+  // OGCG-LABEL: variable_c11_atomic_thread_fences
+  // OGCG:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // OGCG:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // OGCG:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // OGCG:     i32 2, label %[[ACQUIRE_BLK]]
+  // OGCG:     i32 3, label %[[RELEASE_BLK:.+]]
+  // OGCG:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // OGCG:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // OGCG:   ]
+  // OGCG: [[ACQUIRE_BLK]]:
+  // OGCG:   fence acquire
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[RELEASE_BLK]]:
+  // OGCG:   fence release
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[ACQ_REL_BLK]]:
+  // OGCG:   fence acq_rel
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[SEQ_CST_BLK]]:
+  // OGCG:   fence seq_cst
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[DEFAULT_BLK]]:
+  // OGCG:   ret void
+}
+
+void variable_atomic_signal_fences(int memorder) {
+  __atomic_signal_fence(memorder);
+  // CIR-LABEL: variable_atomic_signal_fences
+  // CIR:  cir.switch
+  // CIR:    cir.case(default, []) {
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread) acquire
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<3> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread) release
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<4> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread) acq_rel
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<5> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread)
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.yield
+  // CIR:  }
+
+  // LLVM-LABEL: variable_atomic_signal_fences
+  // LLVM:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // LLVM:   br label %[[SWITCH_BLK:.+]]
+  // LLVM: [[SWITCH_BLK]]:
+  // LLVM:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // LLVM:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // LLVM:     i32 2, label %[[ACQUIRE_BLK]]
+  // LLVM:     i32 3, label %[[RELEASE_BLK:.+]]
+  // LLVM:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // LLVM:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // LLVM:   ]
+  // LLVM: [[DEFAULT_BLK]]:
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQUIRE_BLK]]:
+  // LLVM:   fence syncscope("singlethread") acquire
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[RELEASE_BLK]]:
+  // LLVM:   fence syncscope("singlethread") release
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQ_REL_BLK]]:
+  // LLVM:   fence syncscope("singlethread") acq_rel
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[SEQ_CST_BLK]]:
+  // LLVM:   fence syncscope("singlethread") seq_cst
+  // LLVM:   br label %{{.+}}
+
+  // OGCG-LABEL: variable_atomic_signal_fences
+  // OGCG:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // OGCG:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // OGCG:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // OGCG:     i32 2, label %[[ACQUIRE_BLK]]
+  // OGCG:     i32 3, label %[[RELEASE_BLK:.+]]
+  // OGCG:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // OGCG:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // OGCG:   ]
+  // OGCG: [[ACQUIRE_BLK]]:
+  // OGCG:   fence syncscope("singlethread") acquire
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[RELEASE_BLK]]:
+  // OGCG:   fence syncscope("singlethread") release
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[ACQ_REL_BLK]]:
+  // OGCG:   fence syncscope("singlethread") acq_rel
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[SEQ_CST_BLK]]:
+  // OGCG:   fence syncscope("singlethread") seq_cst
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[DEFAULT_BLK]]:
+  // OGCG:   ret void
+}
+
+void variable_c11_atomic_signal_fences(int memorder) {
+  __c11_atomic_signal_fence(memorder);
+  // CIR-LABEL: variable_c11_atomic_signal_fences
+  // CIR:  cir.switch
+  // CIR:    cir.case(default, []) {
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<1> : !s32i, #cir.int<2> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread) acquire
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<3> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread) release
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<4> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread) acq_rel
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.case(anyof, [#cir.int<5> : !s32i]) {
+  // CIR:      cir.atomic.fence syncscope(single_thread)
+  // CIR:      cir.break
+  // CIR:    }
+  // CIR:    cir.yield
+  // CIR:  }
+
+  // LLVM-LABEL: variable_c11_atomic_signal_fences
+  // LLVM:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // LLVM:   br label %[[SWITCH_BLK:.+]]
+  // LLVM: [[SWITCH_BLK]]:
+  // LLVM:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // LLVM:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // LLVM:     i32 2, label %[[ACQUIRE_BLK]]
+  // LLVM:     i32 3, label %[[RELEASE_BLK:.+]]
+  // LLVM:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // LLVM:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // LLVM:   ]
+  // LLVM: [[DEFAULT_BLK]]:
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQUIRE_BLK]]:
+  // LLVM:   fence syncscope("singlethread") acquire
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[RELEASE_BLK]]:
+  // LLVM:   fence syncscope("singlethread") release
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[ACQ_REL_BLK]]:
+  // LLVM:   fence syncscope("singlethread") acq_rel
+  // LLVM:   br label %{{.+}}
+  // LLVM: [[SEQ_CST_BLK]]:
+  // LLVM:   fence syncscope("singlethread") seq_cst
+  // LLVM:   br label %{{.+}}
+
+  // OGCG-LABEL: variable_c11_atomic_signal_fences
+  // OGCG:   %[[ORDER:.+]] = load i32, ptr %[[PTR:.+]], align 4
+  // OGCG:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
+  // OGCG:     i32 1, label %[[ACQUIRE_BLK:.+]]
+  // OGCG:     i32 2, label %[[ACQUIRE_BLK]]
+  // OGCG:     i32 3, label %[[RELEASE_BLK:.+]]
+  // OGCG:     i32 4, label %[[ACQ_REL_BLK:.+]]
+  // OGCG:     i32 5, label %[[SEQ_CST_BLK:.+]]
+  // OGCG:   ]
+  // OGCG: [[ACQUIRE_BLK]]:
+  // OGCG:   fence syncscope("singlethread") acquire
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[RELEASE_BLK]]:
+  // OGCG:   fence syncscope("singlethread") release
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[ACQ_REL_BLK]]:
+  // OGCG:   fence syncscope("singlethread") acq_rel
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[SEQ_CST_BLK]]:
+  // OGCG:   fence syncscope("singlethread") seq_cst
+  // OGCG:   br label %[[DEFAULT_BLK]]
+  // OGCG: [[DEFAULT_BLK]]:
+  // OGCG:   ret void
+}

>From 7a5f3b4942f0dd88e1d4ddeef1d4cc19bdf307a7 Mon Sep 17 00:00:00 2001
From: Haocong Lu <haocong.lu at witintech.com>
Date: Thu, 18 Dec 2025 13:41:51 +0800
Subject: [PATCH 2/2] [CIR] Refactor `emitAtomicExprWithDynamicMemOrder`

---
 clang/lib/CIR/CodeGen/CIRGenAtomic.cpp       | 169 +++++++++++--------
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp      |  86 +---------
 clang/lib/CIR/CodeGen/CIRGenFunction.h       |   3 +
 clang/test/CIR/CodeGen/atomic-thread-fence.c |  12 ++
 clang/test/CIR/CodeGen/atomic.c              |  20 +--
 5 files changed, 127 insertions(+), 163 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 0b8cded35fee9..2a3019d693a88 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -709,25 +709,40 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest,
   cgf.cgm.errorNYI(expr->getSourceRange(), "emitAtomicOp: dynamic sync scope");
 }
 
-static bool isMemOrderValid(uint64_t order, bool isStore, bool isLoad) {
-  if (!cir::isValidCIRAtomicOrderingCABI(order))
-    return false;
-  auto memOrder = static_cast<cir::MemOrder>(order);
-  if (isStore)
-    return memOrder != cir::MemOrder::Consume &&
-           memOrder != cir::MemOrder::Acquire &&
-           memOrder != cir::MemOrder::AcquireRelease;
-  if (isLoad)
-    return memOrder != cir::MemOrder::Release &&
-           memOrder != cir::MemOrder::AcquireRelease;
-  return true;
+static std::optional<cir::MemOrder>
+getEffectiveAtomicMemOrder(cir::MemOrder oriOrder, bool isStore, bool isLoad,
+                           bool isFence) {
+  // Some memory orders are not supported by partial atomic operation:
+  // {memory_order_releaxed} is not valid for fence operations.
+  // {memory_order_consume, memory_order_acquire} are not valid for write-only
+  // operations.
+  // {memory_order_release} is not valid for read-only operations.
+  // {memory_order_acq_rel} is only valid for read-write operations.
+  if (isStore) {
+    if (oriOrder == cir::MemOrder::Consume ||
+        oriOrder == cir::MemOrder::Acquire ||
+        oriOrder == cir::MemOrder::AcquireRelease)
+      return std::nullopt;
+  } else if (isLoad) {
+    if (oriOrder == cir::MemOrder::Release ||
+        oriOrder == cir::MemOrder::AcquireRelease)
+      return std::nullopt;
+  } else if (isFence) {
+    if (oriOrder == cir::MemOrder::Relaxed)
+      return std::nullopt;
+  }
+  // memory_order_consume is not implemented, it is always treated like
+  // memory_order_acquire
+  if (oriOrder == cir::MemOrder::Consume)
+    return cir::MemOrder::Acquire;
+  return oriOrder;
 }
 
 static void emitAtomicExprWithDynamicMemOrder(
-    CIRGenFunction &cgf, mlir::Value order, AtomicExpr *e, Address dest,
-    Address ptr, Address val1, Address val2, Expr *isWeakExpr,
-    Expr *orderFailExpr, uint64_t size, bool isStore, bool isLoad,
-    const std::optional<Expr::EvalResult> &scopeConst, mlir::Value scopeValue) {
+    CIRGenFunction &cgf, mlir::Value order, bool isStore, bool isLoad,
+    bool isFence, llvm::function_ref<void(cir::MemOrder)> emitAtomicOp) {
+  if (!order)
+    return;
   // The memory order is not known at compile-time.  The atomic operations
   // can't handle runtime memory orders; the memory order must be hard coded.
   // Generate a "switch" statement that converts a runtime value into a
@@ -738,56 +753,75 @@ static void emitAtomicExprWithDynamicMemOrder(
       [&](mlir::OpBuilder &, mlir::Location loc, mlir::OperationState &) {
         mlir::Block *switchBlock = builder.getBlock();
 
-        auto emitMemOrderCase = [&](llvm::ArrayRef<cir::MemOrder> caseOrders,
-                                    cir::MemOrder actualOrder) {
-          if (caseOrders.empty())
+        auto emitMemOrderCase = [&](llvm::ArrayRef<cir::MemOrder> caseOrders) {
+          // Checking there are same effective memory order for each case.
+          for (int i = 1, e = caseOrders.size(); i < e; i++)
+            assert((getEffectiveAtomicMemOrder(caseOrders[i - 1], isStore,
+                                               isLoad, isFence) ==
+                    getEffectiveAtomicMemOrder(caseOrders[i], isStore, isLoad,
+                                               isFence)) &&
+                   "Effective memory order must be same!");
+          // Emit case label and atomic opeartion if neccessary.
+          if (caseOrders.empty()) {
             emitMemOrderDefaultCaseLabel(builder, loc);
-          else
+            // There is no good way to report an unsupported memory order at
+            // runtime, hence the fallback to memory_order_relaxed.
+            if (!isFence)
+              emitAtomicOp(cir::MemOrder::Relaxed);
+          } else if (auto actualOrder = getEffectiveAtomicMemOrder(
+                         caseOrders[0], isStore, isLoad, isFence)) {
+            // Included in default case.
+            if (!isFence && actualOrder == cir::MemOrder::Relaxed)
+              return;
+            // Creating case operation for effective memory order. If there are
+            // multiple cases in `caseOrders`, the actual order of each case
+            // must be same, this needs to be guaranteed by the caller.
             emitMemOrderCaseLabel(builder, loc, order.getType(), caseOrders);
-          emitAtomicOp(cgf, e, dest, ptr, val1, val2, isWeakExpr, orderFailExpr,
-                       size, actualOrder, scopeConst, scopeValue);
+            emitAtomicOp(actualOrder.value());
+          } else {
+            // Do nothing if (!caseOrders.empty() && !actualOrder)
+            return;
+          }
           builder.createBreak(loc);
           builder.setInsertionPointToEnd(switchBlock);
         };
 
-        // default:
-        // Use memory_order_relaxed for relaxed operations and for any memory
-        // order value that is not supported.  There is no good way to report
-        // an unsupported memory order at runtime, hence the fallback to
-        // memory_order_relaxed.
-        emitMemOrderCase(/*caseOrders=*/{}, cir::MemOrder::Relaxed);
-
-        if (!isStore) {
-          // case consume:
-          // case acquire:
-          // memory_order_consume is not implemented; it is always treated
-          // like memory_order_acquire.  These memory orders are not valid for
-          // write-only operations.
-          emitMemOrderCase({cir::MemOrder::Consume, cir::MemOrder::Acquire},
-                           cir::MemOrder::Acquire);
-        }
-
-        if (!isLoad) {
-          // case release:
-          // memory_order_release is not valid for read-only operations.
-          emitMemOrderCase({cir::MemOrder::Release}, cir::MemOrder::Release);
-        }
-
-        if (!isLoad && !isStore) {
-          // case acq_rel:
-          // memory_order_acq_rel is only valid for read-write operations.
-          emitMemOrderCase({cir::MemOrder::AcquireRelease},
-                           cir::MemOrder::AcquireRelease);
-        }
-
-        // case seq_cst:
-        emitMemOrderCase({cir::MemOrder::SequentiallyConsistent},
-                         cir::MemOrder::SequentiallyConsistent);
+        emitMemOrderCase(/*default:*/ {});
+        emitMemOrderCase({cir::MemOrder::Relaxed});
+        emitMemOrderCase({cir::MemOrder::Consume, cir::MemOrder::Acquire});
+        emitMemOrderCase({cir::MemOrder::Release});
+        emitMemOrderCase({cir::MemOrder::AcquireRelease});
+        emitMemOrderCase({cir::MemOrder::SequentiallyConsistent});
 
         builder.createYield(loc);
       });
 }
 
+void CIRGenFunction::emitAtomicExprWithMemOrder(
+    const Expr *memOrder, bool isStore, bool isLoad, bool isFence,
+    llvm::function_ref<void(cir::MemOrder)> emitAtomicOp) {
+  // Emit the memory order operand, and try to evaluate it as a constant.
+  Expr::EvalResult eval;
+  if (memOrder->EvaluateAsInt(eval, getContext())) {
+    uint64_t constOrder = eval.Val.getInt().getZExtValue();
+    // We should not ever get to a case where the ordering isn't a valid CABI
+    // value, but it's hard to enforce that in general.
+    if (!cir::isValidCIRAtomicOrderingCABI(constOrder))
+      return;
+    cir::MemOrder oriOrder = static_cast<cir::MemOrder>(constOrder);
+    if (auto actualOrder =
+            getEffectiveAtomicMemOrder(oriOrder, isStore, isLoad, isFence))
+      emitAtomicOp(actualOrder.value());
+    return;
+  }
+
+  // Otherwise, handle variable memory ordering. Emit `SwitchOp` to convert
+  // dynamic value to static value.
+  mlir::Value dynOrder = emitScalarExpr(memOrder);
+  emitAtomicExprWithDynamicMemOrder(*this, dynOrder, isStore, isLoad, isFence,
+                                    emitAtomicOp);
+}
+
 RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) {
   QualType atomicTy = e->getPtr()->getType()->getPointeeType();
   QualType memTy = atomicTy;
@@ -812,12 +846,6 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) {
   TypeInfoChars typeInfo = getContext().getTypeInfoInChars(atomicTy);
   uint64_t size = typeInfo.Width.getQuantity();
 
-  // Emit the memory order operand, and try to evaluate it as a constant.
-  mlir::Value order = emitScalarExpr(e->getOrder());
-  std::optional<Expr::EvalResult> orderConst;
-  if (Expr::EvalResult eval; e->getOrder()->EvaluateAsInt(eval, getContext()))
-    orderConst.emplace(std::move(eval));
-
   // Emit the sync scope operand, and try to evaluate it as a constant.
   mlir::Value scope =
       e->getScopeModel() ? emitScalarExpr(e->getScope()) : nullptr;
@@ -979,19 +1007,12 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) {
                 e->getOp() == AtomicExpr::AO__scoped_atomic_load ||
                 e->getOp() == AtomicExpr::AO__scoped_atomic_load_n;
 
-  if (orderConst.has_value()) {
-    // We have evaluated the memory order as an integer constant in orderConst.
-    // We should not ever get to a case where the ordering isn't a valid CABI
-    // value, but it's hard to enforce that in general.
-    uint64_t ord = orderConst->Val.getInt().getZExtValue();
-    if (isMemOrderValid(ord, isStore, isLoad))
-      emitAtomicOp(*this, e, dest, ptr, val1, val2, isWeakExpr, orderFailExpr,
-                   size, static_cast<cir::MemOrder>(ord), scopeConst, scope);
-  } else {
-    emitAtomicExprWithDynamicMemOrder(*this, order, e, dest, ptr, val1, val2,
-                                      isWeakExpr, orderFailExpr, size, isStore,
-                                      isLoad, scopeConst, scope);
-  }
+  auto emitAtomicOpCallBackFn = [&](cir::MemOrder memOrder) {
+    emitAtomicOp(*this, e, dest, ptr, val1, val2, isWeakExpr, orderFailExpr,
+                 size, memOrder, scopeConst, scope);
+  };
+  emitAtomicExprWithMemOrder(e->getOrder(), isStore, isLoad, /*isFence*/ false,
+                             emitAtomicOpCallBackFn);
 
   if (resultTy->isVoidType())
     return RValue::get(nullptr);
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index cab81c44f8d77..48c561bceda54 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -65,87 +65,15 @@ static void emitAtomicFenceOp(CIRGenFunction &cgf, const CallExpr *expr,
   CIRGenBuilderTy &builder = cgf.getBuilder();
   mlir::Location loc = cgf.getLoc(expr->getSourceRange());
 
-  // Convert the memory order specified by user to effective one:
-  //   Relaxed                -> std::nullopt
-  //   Consume/Acquire        -> Acquire
-  //   Release                -> Release
-  //   AcquireRelease         -> AcquireRelease
-  //   SequentiallyConsistent -> SequentiallyConsistent
-  auto getEffectiveMemOrder =
-      [](cir::MemOrder oriOrder) -> std::optional<cir::MemOrder> {
-    if (oriOrder == cir::MemOrder::Relaxed)
-      return std::nullopt;
-    else if (oriOrder == cir::MemOrder::Consume ||
-             oriOrder == cir::MemOrder::Acquire)
-      return cir::MemOrder::Acquire;
-    else
-      return oriOrder;
+  auto emitAtomicOpCallBackFn = [&](cir::MemOrder memOrder) {
+    cir::AtomicFenceOp::create(
+        builder, loc, memOrder,
+        cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
   };
 
-  // Handle constant memory ordering.
-  Expr::EvalResult eval;
-  if (expr->getArg(0)->EvaluateAsInt(eval, cgf.getContext())) {
-    uint64_t constOrder = eval.Val.getInt().getZExtValue();
-    // Not emit anything if it's an invalid constant.
-    if (!cir::isValidCIRAtomicOrderingCABI(constOrder))
-      return;
-    cir::MemOrder caseOrder = static_cast<cir::MemOrder>(constOrder);
-    if (std::optional<cir::MemOrder> order = getEffectiveMemOrder(caseOrder))
-      cir::AtomicFenceOp::create(
-          builder, loc, order.value(),
-          cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
-    return;
-  }
-
-  // Otherwise, handle variable memory ordering. Emit `SwitchOp` to convert
-  // dynamic value to static value.
-  mlir::Value varOrder = cgf.emitScalarExpr(expr->getArg(0));
-  cir::SwitchOp::create(
-      builder, loc, varOrder,
-      [&](mlir::OpBuilder &, mlir::Location loc, mlir::OperationState &) {
-        mlir::Block *switchBlock = builder.getBlock();
-
-        auto emitMemOrderCase = [&](llvm::ArrayRef<cir::MemOrder> caseOrders) {
-          if (caseOrders.empty()) {
-            // Creating default case operation
-            mlir::OpBuilder::InsertPoint insertPoint;
-            cir::CaseOp::create(builder, loc, builder.getArrayAttr({}),
-                                cir::CaseOpKind::Default, insertPoint);
-            builder.restoreInsertionPoint(insertPoint);
-          } else if (auto actualOrder = getEffectiveMemOrder(caseOrders[0])) {
-            // Creating case operation for effective memory order. If there are
-            // multiple cases in `caseOrders`, the actual order of each case
-            // must be same, this needs to be guaranteed by the caller.
-            mlir::OpBuilder::InsertPoint insertPoint;
-            llvm::SmallVector<mlir::Attribute, 2> orderAttrs;
-            for (cir::MemOrder caseOrder : caseOrders)
-              orderAttrs.push_back(cir::IntAttr::get(
-                  varOrder.getType(), static_cast<int>(caseOrder)));
-            cir::CaseOp::create(builder, loc, builder.getArrayAttr(orderAttrs),
-                                cir::CaseOpKind::Anyof, insertPoint);
-            // Creating atomic fence operation
-            builder.restoreInsertionPoint(insertPoint);
-            cir::AtomicFenceOp::create(
-                builder, loc, actualOrder.value(),
-                cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
-          } else {
-            // Do nothing if unneccssary (!caseOrders.empty() && !actualOrder)
-            return;
-          }
-          builder.createBreak(loc);
-          builder.setInsertionPointToEnd(switchBlock);
-          return;
-        };
-
-        emitMemOrderCase(/*default:*/ {});
-        emitMemOrderCase({cir::MemOrder::Relaxed}); // Not effective
-        emitMemOrderCase({cir::MemOrder::Consume, cir::MemOrder::Acquire});
-        emitMemOrderCase({cir::MemOrder::Release});
-        emitMemOrderCase({cir::MemOrder::AcquireRelease});
-        emitMemOrderCase({cir::MemOrder::SequentiallyConsistent});
-
-        builder.createYield(loc);
-      });
+  cgf.emitAtomicExprWithMemOrder(expr->getArg(0), /*isStore*/ false,
+                                 /*isLoad*/ false, /*isFence*/ true,
+                                 emitAtomicOpCallBackFn);
 }
 
 namespace {
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index cfe9b37c2c725..4b5f68ac79475 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1349,6 +1349,9 @@ class CIRGenFunction : public CIRGenTypeCache {
   void emitAtomicStore(RValue rvalue, LValue dest, bool isInit);
   void emitAtomicStore(RValue rvalue, LValue dest, cir::MemOrder order,
                        bool isVolatile, bool isInit);
+  void emitAtomicExprWithMemOrder(
+      const Expr *memOrder, bool isStore, bool isLoad, bool isFence,
+      llvm::function_ref<void(cir::MemOrder)> emitAtomicOp);
 
   AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d,
                                     mlir::OpBuilder::InsertPoint ip = {});
diff --git a/clang/test/CIR/CodeGen/atomic-thread-fence.c b/clang/test/CIR/CodeGen/atomic-thread-fence.c
index 69298fb9cd7b0..7aac256af562a 100644
--- a/clang/test/CIR/CodeGen/atomic-thread-fence.c
+++ b/clang/test/CIR/CodeGen/atomic-thread-fence.c
@@ -188,6 +188,7 @@ void const_atomic_thread_fence() {
   __atomic_thread_fence(__ATOMIC_ACQ_REL);
   __atomic_thread_fence(__ATOMIC_SEQ_CST);
   // CIR-LABEL: const_atomic_thread_fence
+  // CIR-NOT: cir.atomic.fence syncscope(system) relaxed
   // CIR: cir.atomic.fence syncscope(system) acquire
   // CIR: cir.atomic.fence syncscope(system) acquire
   // CIR: cir.atomic.fence syncscope(system) release
@@ -195,6 +196,7 @@ void const_atomic_thread_fence() {
   // CIR: cir.atomic.fence syncscope(system) seq_cst
 
   // LLVM-LABEL: const_atomic_thread_fence
+  // LLVM-NOT: fence relaxed
   // LLVM: fence acquire
   // LLVM: fence acquire
   // LLVM: fence release
@@ -202,6 +204,7 @@ void const_atomic_thread_fence() {
   // LLVM: fence seq_cst
 
   // OGCG-LABEL: const_atomic_thread_fence
+  // OGCG-NOT: fence relaxed
   // OGCG: fence acquire
   // OGCG: fence acquire
   // OGCG: fence release
@@ -217,6 +220,7 @@ void const_c11_atomic_thread_fence() {
   __c11_atomic_thread_fence(__ATOMIC_ACQ_REL);
   __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
   // CIR-LABEL: const_c11_atomic_thread_fence
+  // CIR-NOT: cir.atomic.fence syncscope(system) relaxed
   // CIR: cir.atomic.fence syncscope(system) acquire
   // CIR: cir.atomic.fence syncscope(system) acquire
   // CIR: cir.atomic.fence syncscope(system) release
@@ -224,6 +228,7 @@ void const_c11_atomic_thread_fence() {
   // CIR: cir.atomic.fence syncscope(system) seq_cst
 
   // LLVM-LABEL: const_c11_atomic_thread_fence
+  // LLVM-NOT: fence relaxed
   // LLVM: fence acquire
   // LLVM: fence acquire
   // LLVM: fence release
@@ -231,6 +236,7 @@ void const_c11_atomic_thread_fence() {
   // LLVM: fence seq_cst
 
   // OGCG-LABEL: const_c11_atomic_thread_fence
+  // OGCG-NOT: fence relaxed
   // OGCG: fence acquire
   // OGCG: fence acquire
   // OGCG: fence release
@@ -246,6 +252,7 @@ void const_atomic_signal_fence() {
   __atomic_signal_fence(__ATOMIC_ACQ_REL);
   __atomic_signal_fence(__ATOMIC_SEQ_CST);
   // CIR-LABEL: const_atomic_signal_fence
+  // CIR-NOT: cir.atomic.fence syncscope(single_thread) relaxed
   // CIR: cir.atomic.fence syncscope(single_thread) acquire
   // CIR: cir.atomic.fence syncscope(single_thread) acquire
   // CIR: cir.atomic.fence syncscope(single_thread) release
@@ -253,6 +260,7 @@ void const_atomic_signal_fence() {
   // CIR: cir.atomic.fence syncscope(single_thread) seq_cst
 
   // LLVM-LABEL: const_atomic_signal_fence
+  // LLVM-NOT: fence syncscope("singlethread") relaxed
   // LLVM: fence syncscope("singlethread") acquire
   // LLVM: fence syncscope("singlethread") acquire
   // LLVM: fence syncscope("singlethread") release
@@ -260,6 +268,7 @@ void const_atomic_signal_fence() {
   // LLVM: fence syncscope("singlethread") seq_cst
 
   // OGCG--LABEL: const_atomic_signal_fence
+  // OGCG-NOT: fence syncscope("singlethread") relaxed
   // OGCG: fence syncscope("singlethread") acquire
   // OGCG: fence syncscope("singlethread") acquire
   // OGCG: fence syncscope("singlethread") release
@@ -275,6 +284,7 @@ void const_c11_atomic_signal_fence() {
   __c11_atomic_signal_fence(__ATOMIC_ACQ_REL);
   __c11_atomic_signal_fence(__ATOMIC_SEQ_CST);
   // CIR-LABEL: const_c11_atomic_signal_fence
+  // CIR-NOT: cir.atomic.fence syncscope(single_thread) relaxed
   // CIR: cir.atomic.fence syncscope(single_thread) acquire
   // CIR: cir.atomic.fence syncscope(single_thread) acquire
   // CIR: cir.atomic.fence syncscope(single_thread) release
@@ -282,6 +292,7 @@ void const_c11_atomic_signal_fence() {
   // CIR: cir.atomic.fence syncscope(single_thread) seq_cst
 
   // LLVM-LABEL: const_c11_atomic_signal_fence
+  // LLVM-NOT: fence syncscope("singlethread") relaxed
   // LLVM: fence syncscope("singlethread") acquire
   // LLVM: fence syncscope("singlethread") acquire
   // LLVM: fence syncscope("singlethread") release
@@ -289,6 +300,7 @@ void const_c11_atomic_signal_fence() {
   // LLVM: fence syncscope("singlethread") seq_cst
 
   // OGCG-LABEL: const_c11_atomic_signal_fence
+  // OGCG-NOT: fence syncscope("singlethread") relaxed
   // OGCG: fence syncscope("singlethread") acquire
   // OGCG: fence syncscope("singlethread") acquire
   // OGCG: fence syncscope("singlethread") release
diff --git a/clang/test/CIR/CodeGen/atomic.c b/clang/test/CIR/CodeGen/atomic.c
index 6c85c782d4ad2..4baac3bab7bce 100644
--- a/clang/test/CIR/CodeGen/atomic.c
+++ b/clang/test/CIR/CodeGen/atomic.c
@@ -82,7 +82,7 @@ void load(int *ptr) {
 
 // CIR-LABEL: @load
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(relaxed) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(consume) %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(acquire) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(acquire) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(seq_cst) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR: }
@@ -111,7 +111,7 @@ void load_n(int *ptr) {
 
 // CIR-LABEL: @load_n
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(relaxed) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(consume) %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(acquire) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(acquire) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(seq_cst) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR: }
@@ -139,7 +139,7 @@ void c11_load(_Atomic(int) *ptr) {
 
 // CIR-LABEL: @c11_load
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(relaxed) %{{.+}} : !cir.ptr<!s32i>, !s32i
-// CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(consume) %{{.+}} : !cir.ptr<!s32i>, !s32i
+// CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(acquire) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(acquire) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR:   %{{.+}} = cir.load align(4) syncscope(system) atomic(seq_cst) %{{.+}} : !cir.ptr<!s32i>, !s32i
 // CIR: }
@@ -454,7 +454,7 @@ void c11_atomic_exchange(_Atomic(int) *ptr, int value) {
   __c11_atomic_exchange(ptr, value, __ATOMIC_ACQ_REL);
   __c11_atomic_exchange(ptr, value, __ATOMIC_SEQ_CST);
   // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
@@ -487,7 +487,7 @@ void atomic_exchange(int *ptr, int *value, int *old) {
   __atomic_exchange(ptr, value, old, __ATOMIC_ACQ_REL);
   __atomic_exchange(ptr, value, old, __ATOMIC_SEQ_CST);
   // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
@@ -520,7 +520,7 @@ void atomic_exchange_n(int *ptr, int value) {
   __atomic_exchange_n(ptr, value, __ATOMIC_ACQ_REL);
   __atomic_exchange_n(ptr, value, __ATOMIC_SEQ_CST);
   // CIR: %{{.+}} = cir.atomic.xchg relaxed %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
-  // CIR: %{{.+}} = cir.atomic.xchg consume %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
+  // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg acquire %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg release %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
   // CIR: %{{.+}} = cir.atomic.xchg acq_rel %{{.+}}, %{{.+}} : (!cir.ptr<!s32i>, !s32i) -> !s32i
@@ -1218,7 +1218,7 @@ void atomic_store_dynamic_order(int *ptr, int order) {
   __atomic_store_n(ptr, 10, order);
 
   // CIR:      %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-  // CIR-NEXT: %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
+  // CIR:      %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
   // CIR:      cir.switch(%[[ORDER]] : !s32i) {
   // CIR-NEXT:   cir.case(default, []) {
   // CIR-NEXT:     %[[VALUE:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
@@ -1239,7 +1239,7 @@ void atomic_store_dynamic_order(int *ptr, int order) {
   // CIR-NEXT: }
 
   // LLVM:        %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8
-  // LLVM-NEXT:   %[[ORDER:.+]] = load i32, ptr %{{.+}}, align 4
+  // LLVM:        %[[ORDER:.+]] = load i32, ptr %{{.+}}, align 4
   // LLVM:        br label %[[SWITCH_BLK:.+]]
   // LLVM:      [[SWITCH_BLK]]:
   // LLVM-NEXT:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [
@@ -1287,7 +1287,7 @@ int atomic_load_and_store_dynamic_order(int *ptr, int order) {
   return __atomic_exchange_n(ptr, 20, order);
 
   // CIR:      %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i>
-  // CIR-NEXT: %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
+  // CIR:      %[[ORDER:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
   // CIR:      cir.switch(%[[ORDER]] : !s32i) {
   // CIR-NEXT:   cir.case(default, []) {
   // CIR-NEXT:     %[[LIT:.+]] = cir.load align(4) %{{.+}} : !cir.ptr<!s32i>, !s32i
@@ -1324,7 +1324,7 @@ int atomic_load_and_store_dynamic_order(int *ptr, int order) {
   // CIR-NEXT: %{{.+}} = cir.load align(4) %[[RES_SLOT]] : !cir.ptr<!s32i>, !s32i
 
   // LLVM:        %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8
-  // LLVM-NEXT:   %[[ORDER:.+]] = load i32, ptr %{{.+}}, align 4
+  // LLVM:        %[[ORDER:.+]] = load i32, ptr %{{.+}}, align 4
   // LLVM:        br label %[[SWITCH_BLK:.+]]
   // LLVM:      [[SWITCH_BLK]]:
   // LLVM-NEXT:   switch i32 %[[ORDER]], label %[[DEFAULT_BLK:.+]] [