[clang] [Clang] Add support for scoped atomic thread fence (PR #115545)

Joseph Huber via cfe-commits cfe-commits at lists.llvm.org
Wed Nov 13 13:12:28 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/115545

>From be73e1600846f6026c03d2e3107b4237f54c51ac Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 8 Nov 2024 15:42:04 -0600
Subject: [PATCH 1/5] [Clang] Add support for scoped atomic thread fence

Summary:
Previously we added support for all of the atomic GNU extensions with
optional memory scoped except for `__atomic_thread_fence`. This patch
adds support for that. This should ideally allow us to generically emit
these LLVM scopes.
---
 clang/include/clang/Basic/Builtins.td |   6 +
 clang/lib/CodeGen/CGBuiltin.cpp       | 129 +++++++++++++++++++
 clang/test/CodeGen/scoped-fence-ops.c | 179 ++++++++++++++++++++++++++
 3 files changed, 314 insertions(+)
 create mode 100644 clang/test/CodeGen/scoped-fence-ops.c

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index e866605ac05c09..46f6c02dfa56ab 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1995,6 +1995,12 @@ def AtomicThreadFence : Builtin {
   let Prototype = "void(int)";
 }
 
+def ScopedAtomicThreadFence : Builtin {
+  let Spellings = ["__scoped_atomic_thread_fence"];
+  let Attributes = [NoThrow];
+  let Prototype = "void(int, int)";
+}
+
 def AtomicSignalFence : Builtin {
   let Spellings = ["__atomic_signal_fence"];
   let Attributes = [NoThrow];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9e0c0bff0125c0..0194848e0d6194 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5174,6 +5174,135 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Builder.SetInsertPoint(ContBB);
     return RValue::get(nullptr);
   }
+  case Builtin::BI__scoped_atomic_thread_fence: {
+    auto ScopeModel = AtomicScopeModel::create(AtomicScopeModelKind::Generic);
+
+    Value *Order = EmitScalarExpr(E->getArg(0));
+    Value *Scope = EmitScalarExpr(E->getArg(1));
+    if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) {
+      int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+      int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
+      SyncScope SS = ScopeModel->isValid(Scp)
+                         ? ScopeModel->map(Scp)
+                         : ScopeModel->map(ScopeModel->getFallBackValue());
+      switch (Ord) {
+      case 0:  // memory_order_relaxed
+      default: // invalid order
+        break;
+      case 1: // memory_order_consume
+      case 2: // memory_order_acquire
+        Builder.CreateFence(
+            llvm::AtomicOrdering::Acquire,
+            getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
+                                                llvm::AtomicOrdering::Acquire,
+                                                getLLVMContext()));
+        break;
+      case 3: // memory_order_release
+        Builder.CreateFence(
+            llvm::AtomicOrdering::Release,
+            getTargetHooks().getLLVMSyncScopeID(getLangOpts(), SS,
+                                                llvm::AtomicOrdering::Release,
+                                                getLLVMContext()));
+        break;
+      case 4: // memory_order_acq_rel
+        Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease,
+                            getTargetHooks().getLLVMSyncScopeID(
+                                getLangOpts(), SS,
+                                llvm::AtomicOrdering::AcquireRelease,
+                                getLLVMContext()));
+        break;
+      case 5: // memory_order_seq_cst
+        Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
+                            getTargetHooks().getLLVMSyncScopeID(
+                                getLangOpts(), SS,
+                                llvm::AtomicOrdering::SequentiallyConsistent,
+                                getLLVMContext()));
+        break;
+      }
+      return RValue::get(nullptr);
+    }
+
+    llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
+
+    llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs;
+    if (isa<llvm::ConstantInt>(Order)) {
+      int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+      switch (Ord) {
+      case 0:  // memory_order_relaxed
+      default: // invalid order
+        ContBB->eraseFromParent();
+        return RValue::get(nullptr);
+      case 1: // memory_order_consume
+      case 2: // memory_order_acquire
+        OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire;
+        break;
+      case 3: // memory_order_release
+        OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release;
+        break;
+      case 4: // memory_order_acq_rel
+        OrderBBs[Builder.GetInsertBlock()] =
+            llvm::AtomicOrdering::AcquireRelease;
+        break;
+      case 5: // memory_order_seq_cst
+        OrderBBs[Builder.GetInsertBlock()] =
+            llvm::AtomicOrdering::SequentiallyConsistent;
+        break;
+      }
+    } else {
+      llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
+      AcquireBB = createBasicBlock("acquire", CurFn);
+      ReleaseBB = createBasicBlock("release", CurFn);
+      AcqRelBB = createBasicBlock("acqrel", CurFn);
+      SeqCstBB = createBasicBlock("seqcst", CurFn);
+
+      Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
+      llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
+      SI->addCase(Builder.getInt32(1), AcquireBB);
+      SI->addCase(Builder.getInt32(2), AcquireBB);
+      SI->addCase(Builder.getInt32(3), ReleaseBB);
+      SI->addCase(Builder.getInt32(4), AcqRelBB);
+      SI->addCase(Builder.getInt32(5), SeqCstBB);
+
+      OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire;
+      OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release;
+      OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease;
+      OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent;
+    }
+
+    for (auto &[OrderBB, Ordering] : OrderBBs) {
+      Builder.SetInsertPoint(OrderBB);
+      if (isa<llvm::ConstantInt>(Scope)) {
+        int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
+        SyncScope SS = ScopeModel->isValid(Scp)
+                           ? ScopeModel->map(Scp)
+                           : ScopeModel->map(ScopeModel->getFallBackValue());
+        Builder.CreateFence(Ordering,
+                            getTargetHooks().getLLVMSyncScopeID(
+                                getLangOpts(), SS, Ordering, getLLVMContext()));
+        Builder.CreateBr(ContBB);
+      } else {
+        llvm::DenseMap<unsigned, llvm::BasicBlock *> BBs;
+        for (unsigned Scp : ScopeModel->getRuntimeValues())
+          BBs[Scp] = createBasicBlock(getAsString(ScopeModel->map(Scp)), CurFn);
+
+        auto *SC = Builder.CreateIntCast(Scope, Builder.getInt32Ty(), false);
+        llvm::SwitchInst *SI = Builder.CreateSwitch(SC, ContBB);
+        for (unsigned Scp : ScopeModel->getRuntimeValues()) {
+          auto *B = BBs[Scp];
+          SI->addCase(Builder.getInt32(Scp), B);
+
+          Builder.SetInsertPoint(B);
+          Builder.CreateFence(Ordering, getTargetHooks().getLLVMSyncScopeID(
+                                            getLangOpts(), ScopeModel->map(Scp),
+                                            Ordering, getLLVMContext()));
+          Builder.CreateBr(ContBB);
+        }
+      }
+    }
+
+    Builder.SetInsertPoint(ContBB);
+    return RValue::get(nullptr);
+  }
 
   case Builtin::BI__builtin_signbit:
   case Builtin::BI__builtin_signbitf:
diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c
new file mode 100644
index 00000000000000..cb48176d37c852
--- /dev/null
+++ b/clang/test/CodeGen/scoped-fence-ops.c
@@ -0,0 +1,179 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
+// RUN:   -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
+//:   -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe1a(
+// SPIRV-SAME: ) #[[ATTR0:[0-9]+]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    fence syncscope("workgroup") release
+// SPIRV-NEXT:    ret void
+// AMDGCN-LABEL: define hidden void @fe1a(
+// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT:    ret void
+//
+void fe1a() {
+  __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe1b(
+// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    [[ORD_ADDR:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
+// SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
+// SPIRV-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// SPIRV-NEXT:      i32 1, label %[[ACQUIRE:.*]]
+// SPIRV-NEXT:      i32 2, label %[[ACQUIRE]]
+// SPIRV-NEXT:      i32 3, label %[[RELEASE:.*]]
+// SPIRV-NEXT:      i32 4, label %[[ACQREL:.*]]
+// SPIRV-NEXT:      i32 5, label %[[SEQCST:.*]]
+// SPIRV-NEXT:    ]
+// SPIRV:       [[ATOMIC_SCOPE_CONTINUE]]:
+// SPIRV-NEXT:    ret void
+// SPIRV:       [[ACQUIRE]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") acquire
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[RELEASE]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[ACQREL]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") acq_rel
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[SEQCST]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") seq_cst
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-LABEL: define hidden void @fe1b(
+// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
+// AMDGCN-NEXT:    store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-NEXT:      i32 1, label %[[ACQUIRE:.*]]
+// AMDGCN-NEXT:      i32 2, label %[[ACQUIRE]]
+// AMDGCN-NEXT:      i32 3, label %[[RELEASE:.*]]
+// AMDGCN-NEXT:      i32 4, label %[[ACQREL:.*]]
+// AMDGCN-NEXT:      i32 5, label %[[SEQCST:.*]]
+// AMDGCN-NEXT:    ]
+// AMDGCN:       [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-NEXT:    ret void
+// AMDGCN:       [[ACQUIRE]]:
+// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") acquire
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[RELEASE]]:
+// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[ACQREL]]:
+// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") acq_rel
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[SEQCST]]:
+// AMDGCN-NEXT:    fence syncscope("workgroup") seq_cst
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+void fe1b(int ord) {
+  __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe1c(
+// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    [[SCOPE_ADDR:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// SPIRV-NEXT:      i32 1, label %[[DEVICE_SCOPE:.*]]
+// SPIRV-NEXT:      i32 0, label %[[SYSTEM_SCOPE:.*]]
+// SPIRV-NEXT:      i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// SPIRV-NEXT:      i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// SPIRV-NEXT:      i32 4, label %[[SINGLE_SCOPE:.*]]
+// SPIRV-NEXT:    ]
+// SPIRV:       [[ATOMIC_SCOPE_CONTINUE]]:
+// SPIRV-NEXT:    ret void
+// SPIRV:       [[DEVICE_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("device") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[SYSTEM_SCOPE]]:
+// SPIRV-NEXT:    fence release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[WORKGROUP_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[WAVEFRONT_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("subgroup") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[SINGLE_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("singlethread") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN-LABEL: define hidden void @fe1c(
+// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
+// AMDGCN-NEXT:    store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
+// AMDGCN-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// AMDGCN-NEXT:      i32 1, label %[[DEVICE_SCOPE:.*]]
+// AMDGCN-NEXT:      i32 0, label %[[SYSTEM_SCOPE:.*]]
+// AMDGCN-NEXT:      i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// AMDGCN-NEXT:      i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// AMDGCN-NEXT:      i32 4, label %[[SINGLE_SCOPE:.*]]
+// AMDGCN-NEXT:    ]
+// AMDGCN:       [[ATOMIC_SCOPE_CONTINUE]]:
+// AMDGCN-NEXT:    ret void
+// AMDGCN:       [[DEVICE_SCOPE]]:
+// AMDGCN-NEXT:    fence syncscope("agent-one-as") release
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[SYSTEM_SCOPE]]:
+// AMDGCN-NEXT:    fence syncscope("one-as") release
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[WORKGROUP_SCOPE]]:
+// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[WAVEFRONT_SCOPE]]:
+// AMDGCN-NEXT:    fence syncscope("wavefront-one-as") release
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// AMDGCN:       [[SINGLE_SCOPE]]:
+// AMDGCN-NEXT:    fence syncscope("singlethread-one-as") release
+// AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+void fe1c(int scope) {
+  __scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe2a(
+// SPIRV-SAME: ) #[[ATTR0]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    ret void
+// AMDGCN-LABEL: define hidden void @fe2a(
+// AMDGCN-SAME: ) #[[ATTR0]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    ret void
+//
+void fe2a() {
+  __scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM);
+}
+
+//
+// SPIRV-LABEL: define hidden spir_func void @fe2b(
+// SPIRV-SAME: ) #[[ATTR0]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    fence release
+// SPIRV-NEXT:    ret void
+// AMDGCN-LABEL: define hidden void @fe2b(
+// AMDGCN-SAME: ) #[[ATTR0]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    fence syncscope("one-as") release
+// AMDGCN-NEXT:    ret void
+//
+void fe2b() {
+  __scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999);
+}

>From a219be99ba94f77992a84e3848978e0a97f8d0ca Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 8 Nov 2024 16:02:29 -0600
Subject: [PATCH 2/5] x86 test

---
 clang/test/CodeGen/scoped-fence-ops.c | 216 ++++++++++++++++++--------
 1 file changed, 147 insertions(+), 69 deletions(-)

diff --git a/clang/test/CodeGen/scoped-fence-ops.c b/clang/test/CodeGen/scoped-fence-ops.c
index cb48176d37c852..376cb11e84d3da 100644
--- a/clang/test/CodeGen/scoped-fence-ops.c
+++ b/clang/test/CodeGen/scoped-fence-ops.c
@@ -1,53 +1,33 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
 // RUN:   -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
-//: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
-//:   -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
+// RUN:   -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \
+// RUN:   -fvisibility=hidden | FileCheck --check-prefix=X86_64 %s
 
+// AMDGCN-LABEL: define hidden void @fe1a(
+// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") release
+// AMDGCN-NEXT:    ret void
 //
 // SPIRV-LABEL: define hidden spir_func void @fe1a(
 // SPIRV-SAME: ) #[[ATTR0:[0-9]+]] {
 // SPIRV-NEXT:  [[ENTRY:.*:]]
 // SPIRV-NEXT:    fence syncscope("workgroup") release
 // SPIRV-NEXT:    ret void
-// AMDGCN-LABEL: define hidden void @fe1a(
-// AMDGCN-SAME: ) #[[ATTR0:[0-9]+]] {
-// AMDGCN-NEXT:  [[ENTRY:.*:]]
-// AMDGCN-NEXT:    fence syncscope("workgroup-one-as") release
-// AMDGCN-NEXT:    ret void
+//
+// X86_64-LABEL: define hidden void @fe1a(
+// X86_64-SAME: ) #[[ATTR0:[0-9]+]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    ret void
 //
 void fe1a() {
   __scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
 }
 
-//
-// SPIRV-LABEL: define hidden spir_func void @fe1b(
-// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
-// SPIRV-NEXT:  [[ENTRY:.*:]]
-// SPIRV-NEXT:    [[ORD_ADDR:%.*]] = alloca i32, align 4
-// SPIRV-NEXT:    store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
-// SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
-// SPIRV-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// SPIRV-NEXT:      i32 1, label %[[ACQUIRE:.*]]
-// SPIRV-NEXT:      i32 2, label %[[ACQUIRE]]
-// SPIRV-NEXT:      i32 3, label %[[RELEASE:.*]]
-// SPIRV-NEXT:      i32 4, label %[[ACQREL:.*]]
-// SPIRV-NEXT:      i32 5, label %[[SEQCST:.*]]
-// SPIRV-NEXT:    ]
-// SPIRV:       [[ATOMIC_SCOPE_CONTINUE]]:
-// SPIRV-NEXT:    ret void
-// SPIRV:       [[ACQUIRE]]:
-// SPIRV-NEXT:    fence syncscope("workgroup") acquire
-// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[RELEASE]]:
-// SPIRV-NEXT:    fence syncscope("workgroup") release
-// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[ACQREL]]:
-// SPIRV-NEXT:    fence syncscope("workgroup") acq_rel
-// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[SEQCST]]:
-// SPIRV-NEXT:    fence syncscope("workgroup") seq_cst
-// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
 // AMDGCN-LABEL: define hidden void @fe1b(
 // AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
 // AMDGCN-NEXT:  [[ENTRY:.*:]]
@@ -77,41 +57,66 @@ void fe1a() {
 // AMDGCN-NEXT:    fence syncscope("workgroup") seq_cst
 // AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
 //
-void fe1b(int ord) {
-  __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
-}
-
-//
-// SPIRV-LABEL: define hidden spir_func void @fe1c(
-// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// SPIRV-LABEL: define hidden spir_func void @fe1b(
+// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
 // SPIRV-NEXT:  [[ENTRY:.*:]]
-// SPIRV-NEXT:    [[SCOPE_ADDR:%.*]] = alloca i32, align 4
-// SPIRV-NEXT:    store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
-// SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT:    [[ORD_ADDR:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
+// SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
 // SPIRV-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
-// SPIRV-NEXT:      i32 1, label %[[DEVICE_SCOPE:.*]]
-// SPIRV-NEXT:      i32 0, label %[[SYSTEM_SCOPE:.*]]
-// SPIRV-NEXT:      i32 2, label %[[WORKGROUP_SCOPE:.*]]
-// SPIRV-NEXT:      i32 3, label %[[WAVEFRONT_SCOPE:.*]]
-// SPIRV-NEXT:      i32 4, label %[[SINGLE_SCOPE:.*]]
+// SPIRV-NEXT:      i32 1, label %[[ACQUIRE:.*]]
+// SPIRV-NEXT:      i32 2, label %[[ACQUIRE]]
+// SPIRV-NEXT:      i32 3, label %[[RELEASE:.*]]
+// SPIRV-NEXT:      i32 4, label %[[ACQREL:.*]]
+// SPIRV-NEXT:      i32 5, label %[[SEQCST:.*]]
 // SPIRV-NEXT:    ]
 // SPIRV:       [[ATOMIC_SCOPE_CONTINUE]]:
 // SPIRV-NEXT:    ret void
-// SPIRV:       [[DEVICE_SCOPE]]:
-// SPIRV-NEXT:    fence syncscope("device") release
-// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[SYSTEM_SCOPE]]:
-// SPIRV-NEXT:    fence release
+// SPIRV:       [[ACQUIRE]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") acquire
 // SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[WORKGROUP_SCOPE]]:
+// SPIRV:       [[RELEASE]]:
 // SPIRV-NEXT:    fence syncscope("workgroup") release
 // SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[WAVEFRONT_SCOPE]]:
-// SPIRV-NEXT:    fence syncscope("subgroup") release
+// SPIRV:       [[ACQREL]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") acq_rel
 // SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
-// SPIRV:       [[SINGLE_SCOPE]]:
-// SPIRV-NEXT:    fence syncscope("singlethread") release
+// SPIRV:       [[SEQCST]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") seq_cst
 // SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// X86_64-LABEL: define hidden void @fe1b(
+// X86_64-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    [[ORD_ADDR:%.*]] = alloca i32, align 4
+// X86_64-NEXT:    store i32 [[ORD]], ptr [[ORD_ADDR]], align 4
+// X86_64-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR]], align 4
+// X86_64-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// X86_64-NEXT:      i32 1, label %[[ACQUIRE:.*]]
+// X86_64-NEXT:      i32 2, label %[[ACQUIRE]]
+// X86_64-NEXT:      i32 3, label %[[RELEASE:.*]]
+// X86_64-NEXT:      i32 4, label %[[ACQREL:.*]]
+// X86_64-NEXT:      i32 5, label %[[SEQCST:.*]]
+// X86_64-NEXT:    ]
+// X86_64:       [[ATOMIC_SCOPE_CONTINUE]]:
+// X86_64-NEXT:    ret void
+// X86_64:       [[ACQUIRE]]:
+// X86_64-NEXT:    fence acquire
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[RELEASE]]:
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[ACQREL]]:
+// X86_64-NEXT:    fence acq_rel
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[SEQCST]]:
+// X86_64-NEXT:    fence seq_cst
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+void fe1b(int ord) {
+  __scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
+}
+
 // AMDGCN-LABEL: define hidden void @fe1c(
 // AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
 // AMDGCN-NEXT:  [[ENTRY:.*:]]
@@ -144,35 +149,108 @@ void fe1b(int ord) {
 // AMDGCN-NEXT:    fence syncscope("singlethread-one-as") release
 // AMDGCN-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
 //
+// SPIRV-LABEL: define hidden spir_func void @fe1c(
+// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    [[SCOPE_ADDR:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
+// SPIRV-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// SPIRV-NEXT:      i32 1, label %[[DEVICE_SCOPE:.*]]
+// SPIRV-NEXT:      i32 0, label %[[SYSTEM_SCOPE:.*]]
+// SPIRV-NEXT:      i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// SPIRV-NEXT:      i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// SPIRV-NEXT:      i32 4, label %[[SINGLE_SCOPE:.*]]
+// SPIRV-NEXT:    ]
+// SPIRV:       [[ATOMIC_SCOPE_CONTINUE]]:
+// SPIRV-NEXT:    ret void
+// SPIRV:       [[DEVICE_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("device") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[SYSTEM_SCOPE]]:
+// SPIRV-NEXT:    fence release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[WORKGROUP_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("workgroup") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[WAVEFRONT_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("subgroup") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// SPIRV:       [[SINGLE_SCOPE]]:
+// SPIRV-NEXT:    fence syncscope("singlethread") release
+// SPIRV-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
+// X86_64-LABEL: define hidden void @fe1c(
+// X86_64-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    [[SCOPE_ADDR:%.*]] = alloca i32, align 4
+// X86_64-NEXT:    store i32 [[SCOPE]], ptr [[SCOPE_ADDR]], align 4
+// X86_64-NEXT:    [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR]], align 4
+// X86_64-NEXT:    switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
+// X86_64-NEXT:      i32 1, label %[[DEVICE_SCOPE:.*]]
+// X86_64-NEXT:      i32 0, label %[[SYSTEM_SCOPE:.*]]
+// X86_64-NEXT:      i32 2, label %[[WORKGROUP_SCOPE:.*]]
+// X86_64-NEXT:      i32 3, label %[[WAVEFRONT_SCOPE:.*]]
+// X86_64-NEXT:      i32 4, label %[[SINGLE_SCOPE:.*]]
+// X86_64-NEXT:    ]
+// X86_64:       [[ATOMIC_SCOPE_CONTINUE]]:
+// X86_64-NEXT:    ret void
+// X86_64:       [[DEVICE_SCOPE]]:
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[SYSTEM_SCOPE]]:
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[WORKGROUP_SCOPE]]:
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[WAVEFRONT_SCOPE]]:
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+// X86_64:       [[SINGLE_SCOPE]]:
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    br label %[[ATOMIC_SCOPE_CONTINUE]]
+//
 void fe1c(int scope) {
   __scoped_atomic_thread_fence(__ATOMIC_RELEASE, scope);
 }
 
+// AMDGCN-LABEL: define hidden void @fe2a(
+// AMDGCN-SAME: ) #[[ATTR0]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    ret void
 //
 // SPIRV-LABEL: define hidden spir_func void @fe2a(
 // SPIRV-SAME: ) #[[ATTR0]] {
 // SPIRV-NEXT:  [[ENTRY:.*:]]
 // SPIRV-NEXT:    ret void
-// AMDGCN-LABEL: define hidden void @fe2a(
-// AMDGCN-SAME: ) #[[ATTR0]] {
-// AMDGCN-NEXT:  [[ENTRY:.*:]]
-// AMDGCN-NEXT:    ret void
+//
+// X86_64-LABEL: define hidden void @fe2a(
+// X86_64-SAME: ) #[[ATTR0]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    ret void
 //
 void fe2a() {
   __scoped_atomic_thread_fence(999, __MEMORY_SCOPE_SYSTEM);
 }
 
+// AMDGCN-LABEL: define hidden void @fe2b(
+// AMDGCN-SAME: ) #[[ATTR0]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    fence syncscope("one-as") release
+// AMDGCN-NEXT:    ret void
 //
 // SPIRV-LABEL: define hidden spir_func void @fe2b(
 // SPIRV-SAME: ) #[[ATTR0]] {
 // SPIRV-NEXT:  [[ENTRY:.*:]]
 // SPIRV-NEXT:    fence release
 // SPIRV-NEXT:    ret void
-// AMDGCN-LABEL: define hidden void @fe2b(
-// AMDGCN-SAME: ) #[[ATTR0]] {
-// AMDGCN-NEXT:  [[ENTRY:.*:]]
-// AMDGCN-NEXT:    fence syncscope("one-as") release
-// AMDGCN-NEXT:    ret void
+//
+// X86_64-LABEL: define hidden void @fe2b(
+// X86_64-SAME: ) #[[ATTR0]] {
+// X86_64-NEXT:  [[ENTRY:.*:]]
+// X86_64-NEXT:    fence release
+// X86_64-NEXT:    ret void
 //
 void fe2b() {
   __scoped_atomic_thread_fence(__ATOMIC_RELEASE, 999);

>From 65f5c16f2aa4048aec1b5ed8c4f8578857b93f1c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 8 Nov 2024 17:05:13 -0600
Subject: [PATCH 3/5] comments

---
 clang/lib/CodeGen/CGBuiltin.cpp | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0194848e0d6194..f74c472a7eac55 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5225,9 +5225,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
 
     llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs;
-    if (isa<llvm::ConstantInt>(Order)) {
-      int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-      switch (Ord) {
+    if (auto Ord = dyn_cast<llvm::ConstantInt>(Order)) {
+      switch (Ord->getZExtValue()) {
       case 0:  // memory_order_relaxed
       default: // invalid order
         ContBB->eraseFromParent();
@@ -5249,11 +5248,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         break;
       }
     } else {
-      llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
-      AcquireBB = createBasicBlock("acquire", CurFn);
-      ReleaseBB = createBasicBlock("release", CurFn);
-      AcqRelBB = createBasicBlock("acqrel", CurFn);
-      SeqCstBB = createBasicBlock("seqcst", CurFn);
+      llvm::BasicBlock *AcquireBB = createBasicBlock("acquire", CurFn);
+      llvm::BasicBlock *ReleaseBB = createBasicBlock("release", CurFn);
+      llvm::BasicBlock *AcqRelBB = createBasicBlock("acqrel", CurFn);
+      llvm::BasicBlock *SeqCstBB = createBasicBlock("seqcst", CurFn);
 
       Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
       llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
@@ -5271,10 +5269,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     for (auto &[OrderBB, Ordering] : OrderBBs) {
       Builder.SetInsertPoint(OrderBB);
-      if (isa<llvm::ConstantInt>(Scope)) {
-        int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
-        SyncScope SS = ScopeModel->isValid(Scp)
-                           ? ScopeModel->map(Scp)
+      if (auto Scp = dyn_cast<llvm::ConstantInt>(Scope)) {
+        SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
+                           ? ScopeModel->map(Scp->getZExtValue())
                            : ScopeModel->map(ScopeModel->getFallBackValue());
         Builder.CreateFence(Ordering,
                             getTargetHooks().getLLVMSyncScopeID(

>From 96960e3366212aa58e24dc22d8e990e1729b2bd0 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 11 Nov 2024 17:30:41 -0600
Subject: [PATCH 4/5] Comments

---
 clang/lib/CodeGen/CGBuiltin.cpp | 37 ++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f74c472a7eac55..609f99487e22f2 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5179,13 +5179,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     Value *Order = EmitScalarExpr(E->getArg(0));
     Value *Scope = EmitScalarExpr(E->getArg(1));
-    if (isa<llvm::ConstantInt>(Order) && isa<llvm::ConstantInt>(Scope)) {
-      int Ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
-      int Scp = cast<llvm::ConstantInt>(Scope)->getZExtValue();
-      SyncScope SS = ScopeModel->isValid(Scp)
-                         ? ScopeModel->map(Scp)
+    if (auto Ord = dyn_cast<llvm::ConstantInt>(Order);
+        auto Scp = dyn_cast<llvm::ConstantInt>(Scope)) {
+      SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
+                         ? ScopeModel->map(Scp->getZExtValue())
                          : ScopeModel->map(ScopeModel->getFallBackValue());
-      switch (Ord) {
+      switch (Ord->getZExtValue()) {
       case 0:  // memory_order_relaxed
       default: // invalid order
         break;
@@ -5224,7 +5223,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     llvm::BasicBlock *ContBB = createBasicBlock("atomic.scope.continue", CurFn);
 
-    llvm::DenseMap<llvm::BasicBlock *, llvm::AtomicOrdering> OrderBBs;
+    llvm::SmallVector<std::pair<llvm::BasicBlock *, llvm::AtomicOrdering>>
+        OrderBBs;
     if (auto Ord = dyn_cast<llvm::ConstantInt>(Order)) {
       switch (Ord->getZExtValue()) {
       case 0:  // memory_order_relaxed
@@ -5233,18 +5233,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         return RValue::get(nullptr);
       case 1: // memory_order_consume
       case 2: // memory_order_acquire
-        OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Acquire;
+        OrderBBs.emplace_back(Builder.GetInsertBlock(),
+                              llvm::AtomicOrdering::Acquire);
         break;
       case 3: // memory_order_release
-        OrderBBs[Builder.GetInsertBlock()] = llvm::AtomicOrdering::Release;
+        OrderBBs.emplace_back(Builder.GetInsertBlock(),
+                              llvm::AtomicOrdering::Release);
         break;
       case 4: // memory_order_acq_rel
-        OrderBBs[Builder.GetInsertBlock()] =
-            llvm::AtomicOrdering::AcquireRelease;
+        OrderBBs.emplace_back(Builder.GetInsertBlock(),
+                              llvm::AtomicOrdering::AcquireRelease);
         break;
       case 5: // memory_order_seq_cst
-        OrderBBs[Builder.GetInsertBlock()] =
-            llvm::AtomicOrdering::SequentiallyConsistent;
+        OrderBBs.emplace_back(Builder.GetInsertBlock(),
+                              llvm::AtomicOrdering::SequentiallyConsistent);
         break;
       }
     } else {
@@ -5261,10 +5263,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       SI->addCase(Builder.getInt32(4), AcqRelBB);
       SI->addCase(Builder.getInt32(5), SeqCstBB);
 
-      OrderBBs[AcquireBB] = llvm::AtomicOrdering::Acquire;
-      OrderBBs[ReleaseBB] = llvm::AtomicOrdering::Release;
-      OrderBBs[AcqRelBB] = llvm::AtomicOrdering::AcquireRelease;
-      OrderBBs[SeqCstBB] = llvm::AtomicOrdering::SequentiallyConsistent;
+      OrderBBs.emplace_back(AcquireBB, llvm::AtomicOrdering::Acquire);
+      OrderBBs.emplace_back(ReleaseBB, llvm::AtomicOrdering::Release);
+      OrderBBs.emplace_back(AcqRelBB, llvm::AtomicOrdering::AcquireRelease);
+      OrderBBs.emplace_back(SeqCstBB,
+                            llvm::AtomicOrdering::SequentiallyConsistent);
     }
 
     for (auto &[OrderBB, Ordering] : OrderBBs) {

>From 15b35584f6518d7fba124b89fc19ad6282e82703 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 13 Nov 2024 15:12:12 -0600
Subject: [PATCH 5/5] fix

---
 clang/lib/CodeGen/CGBuiltin.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 609f99487e22f2..414ed523497cef 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5179,8 +5179,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     Value *Order = EmitScalarExpr(E->getArg(0));
     Value *Scope = EmitScalarExpr(E->getArg(1));
-    if (auto Ord = dyn_cast<llvm::ConstantInt>(Order);
-        auto Scp = dyn_cast<llvm::ConstantInt>(Scope)) {
+    auto Ord = dyn_cast<llvm::ConstantInt>(Order);
+    auto Scp = dyn_cast<llvm::ConstantInt>(Scope);
+    if (Ord && Scp) {
       SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
                          ? ScopeModel->map(Scp->getZExtValue())
                          : ScopeModel->map(ScopeModel->getFallBackValue());
@@ -5225,7 +5226,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     llvm::SmallVector<std::pair<llvm::BasicBlock *, llvm::AtomicOrdering>>
         OrderBBs;
-    if (auto Ord = dyn_cast<llvm::ConstantInt>(Order)) {
+    if (Ord) {
       switch (Ord->getZExtValue()) {
       case 0:  // memory_order_relaxed
       default: // invalid order
@@ -5272,7 +5273,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     for (auto &[OrderBB, Ordering] : OrderBBs) {
       Builder.SetInsertPoint(OrderBB);
-      if (auto Scp = dyn_cast<llvm::ConstantInt>(Scope)) {
+      if (Scp) {
         SyncScope SS = ScopeModel->isValid(Scp->getZExtValue())
                            ? ScopeModel->map(Scp->getZExtValue())
                            : ScopeModel->map(ScopeModel->getFallBackValue());



More information about the cfe-commits mailing list