[clang] [clang] Add support for cluster sync scope (PR #162575)

via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 8 17:04:02 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: None (macurtis-amd)

<details>
<summary>Changes</summary>



---

Patch is 400.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162575.diff


15 Files Affected:

- (modified) clang/docs/HIPSupport.rst (+2) 
- (modified) clang/docs/LanguageExtensions.rst (+1) 
- (modified) clang/include/clang/Basic/SyncScope.h (+32-10) 
- (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+15-14) 
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+4) 
- (modified) clang/lib/CodeGen/Targets/SPIR.cpp (+2) 
- (modified) clang/lib/Frontend/InitPreprocessor.cpp (+2) 
- (modified) clang/test/CodeGen/scoped-atomic-ops.c (+4293-190) 
- (modified) clang/test/CodeGen/scoped-fence-ops.c (+28-13) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (+41-18) 
- (modified) clang/test/Preprocessor/init-aarch64.c (+2) 
- (modified) clang/test/Preprocessor/init-loongarch.c (+12-10) 
- (modified) clang/test/Preprocessor/init.c (+11-6) 
- (modified) clang/test/SemaCUDA/atomic-ops.cu (+7-2) 
- (modified) clang/test/SemaCUDA/spirv-amdgcn-atomic-ops.cu (+7-2) 


``````````diff
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index b4a671e3cfa3c..ec2af2a6f569d 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -164,6 +164,8 @@ Predefined Macros
      - Represents wavefront memory scope in HIP (value is 2).
    * - ``__HIP_MEMORY_SCOPE_WORKGROUP``
      - Represents workgroup memory scope in HIP (value is 3).
+   * - ``__HIP_MEMORY_SCOPE_CLUSTER``
+     - Represents cluster memory scope in HIP (value is 6).
    * - ``__HIP_MEMORY_SCOPE_AGENT``
      - Represents agent memory scope in HIP (value is 4).
    * - ``__HIP_MEMORY_SCOPE_SYSTEM``
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 6bb99c757cd19..bef6e9c14b182 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4846,6 +4846,7 @@ currently supported:
 * ``__MEMORY_SCOPE_SYSTEM``
 * ``__MEMORY_SCOPE_DEVICE``
 * ``__MEMORY_SCOPE_WRKGRP``
+* ``__MEMORY_SCOPE_CLUSTR``
 * ``__MEMORY_SCOPE_WVFRNT``
 * ``__MEMORY_SCOPE_SINGLE``
 
diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h
index 5a8d2a7dd02e5..614e5faa78696 100644
--- a/clang/include/clang/Basic/SyncScope.h
+++ b/clang/include/clang/Basic/SyncScope.h
@@ -43,11 +43,13 @@ enum class SyncScope {
   SystemScope,
   DeviceScope,
   WorkgroupScope,
+  ClusterScope,
   WavefrontScope,
   SingleScope,
   HIPSingleThread,
   HIPWavefront,
   HIPWorkgroup,
+  HIPCluster,
   HIPAgent,
   HIPSystem,
   OpenCLWorkGroup,
@@ -65,6 +67,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
     return "device_scope";
   case SyncScope::WorkgroupScope:
     return "workgroup_scope";
+  case SyncScope::ClusterScope:
+    return "cluster_scope";
   case SyncScope::WavefrontScope:
     return "wavefront_scope";
   case SyncScope::SingleScope:
@@ -75,6 +79,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
     return "hip_wavefront";
   case SyncScope::HIPWorkgroup:
     return "hip_workgroup";
+  case SyncScope::HIPCluster:
+    return "hip_cluster";
   case SyncScope::HIPAgent:
     return "hip_agent";
   case SyncScope::HIPSystem:
@@ -180,7 +186,10 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
     Workgroup = 3,
     Agent = 4,
     System = 5,
-    Last = System
+    Cluster = 6,
+    End,
+    Last = End - 1,
+    Count = Last
   };
 
   AtomicScopeHIPModel() {}
@@ -193,10 +202,14 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
       return SyncScope::HIPWavefront;
     case Workgroup:
       return SyncScope::HIPWorkgroup;
+    case Cluster:
+      return SyncScope::HIPCluster;
     case Agent:
       return SyncScope::HIPAgent;
     case System:
       return SyncScope::HIPSystem;
+    case End:
+      break;
     }
     llvm_unreachable("Invalid language sync scope value");
   }
@@ -207,11 +220,12 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
   }
 
   ArrayRef<unsigned> getRuntimeValues() const override {
-    static_assert(Last == System, "Does not include all sync scopes");
     static const unsigned Scopes[] = {
         static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront),
-        static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent),
-        static_cast<unsigned>(System)};
+        static_cast<unsigned>(Workgroup),    static_cast<unsigned>(Cluster),
+        static_cast<unsigned>(System),       static_cast<unsigned>(Agent)};
+    static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
+                  "Does not include all sync scopes");
     return llvm::ArrayRef(Scopes);
   }
 
@@ -223,14 +237,17 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
 /// Defines the generic atomic scope model.
 class AtomicScopeGenericModel : public AtomicScopeModel {
 public:
-  /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
+  /// The enum values match predefined built-in macros __MEMORY_SCOPE_*.
   enum ID {
     System = 0,
     Device = 1,
     Workgroup = 2,
     Wavefront = 3,
     Single = 4,
-    Last = Single
+    Cluster = 5,
+    End,
+    Last = End - 1,
+    Count = End
   };
 
   AtomicScopeGenericModel() = default;
@@ -243,10 +260,14 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
       return SyncScope::SystemScope;
     case Workgroup:
       return SyncScope::WorkgroupScope;
+    case Cluster:
+      return SyncScope::ClusterScope;
     case Wavefront:
       return SyncScope::WavefrontScope;
     case Single:
       return SyncScope::SingleScope;
+    case End:
+      break;
     }
     llvm_unreachable("Invalid language sync scope value");
   }
@@ -256,11 +277,12 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
   }
 
   ArrayRef<unsigned> getRuntimeValues() const override {
-    static_assert(Last == Single, "Does not include all sync scopes");
     static const unsigned Scopes[] = {
-        static_cast<unsigned>(Device), static_cast<unsigned>(System),
-        static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
-        static_cast<unsigned>(Single)};
+        static_cast<unsigned>(System),    static_cast<unsigned>(Device),
+        static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
+        static_cast<unsigned>(Wavefront), static_cast<unsigned>(Single)};
+    static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
+                  "Does not include all sync scopes");
     return llvm::ArrayRef(Scopes);
   }
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 6596ec06199dc..97b5828011cd4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGBuiltin.h"
+#include "clang/Basic/SyncScope.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -242,33 +243,33 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
   }
 
   // Older builtins had an enum argument for the memory scope.
+  const char *ssn = nullptr;
   int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
   switch (scope) {
-  case 0: // __MEMORY_SCOPE_SYSTEM
+  case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
     SSID = llvm::SyncScope::System;
     break;
-  case 1: // __MEMORY_SCOPE_DEVICE
-    if (getTarget().getTriple().isSPIRV())
-      SSID = getLLVMContext().getOrInsertSyncScopeID("device");
-    else
-      SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+  case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
+    ssn = getTarget().getTriple().isSPIRV() ? "device" : "agent";
     break;
-  case 2: // __MEMORY_SCOPE_WRKGRP
-    SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
+  case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
+    ssn = "workgroup";
     break;
-  case 3: // __MEMORY_SCOPE_WVFRNT
-    if (getTarget().getTriple().isSPIRV())
-      SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
-    else
-      SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+  case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
+    ssn = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
+    break;
+  case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
+    ssn = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
     break;
-  case 4: // __MEMORY_SCOPE_SINGLE
+  case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
     SSID = llvm::SyncScope::SingleThread;
     break;
   default:
     SSID = llvm::SyncScope::System;
     break;
   }
+  if (ssn)
+    SSID = getLLVMContext().getOrInsertSyncScopeID(ssn);
 }
 
 llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e458a34..c74a1b6098922 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -488,6 +488,10 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
   case SyncScope::WavefrontScope:
     Name = "wavefront";
     break;
+  case SyncScope::HIPCluster:
+  case SyncScope::ClusterScope:
+    Name = "cluster";
+    break;
   case SyncScope::HIPWorkgroup:
   case SyncScope::OpenCLWorkGroup:
   case SyncScope::WorkgroupScope:
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa63143a66cd..fbf29186faf24 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -90,6 +90,8 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
   case SyncScope::OpenCLSubGroup:
   case SyncScope::WavefrontScope:
     return "subgroup";
+  case SyncScope::HIPCluster:
+  case SyncScope::ClusterScope:
   case SyncScope::HIPWorkgroup:
   case SyncScope::OpenCLWorkGroup:
   case SyncScope::WorkgroupScope:
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b899fb9c6494a..21ab9dca8f0bd 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -616,6 +616,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
     Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
     Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
     Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
+    Builder.defineMacro("__HIP_MEMORY_SCOPE_CLUSTER", "6");
     if (LangOpts.HIPStdPar) {
       Builder.defineMacro("__HIPSTDPAR__");
       if (LangOpts.HIPStdParInterposeAlloc) {
@@ -904,6 +905,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   Builder.defineMacro("__MEMORY_SCOPE_WRKGRP", "2");
   Builder.defineMacro("__MEMORY_SCOPE_WVFRNT", "3");
   Builder.defineMacro("__MEMORY_SCOPE_SINGLE", "4");
+  Builder.defineMacro("__MEMORY_SCOPE_CLUSTR", "5");
 
   // Define macros for the OpenCL memory scope.
   // The values should match AtomicScopeOpenCLModel::ID enum.
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index 545a6c90892c2..c39048120a457 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -1,113 +1,772 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN:   -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN:   -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_DEF %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN:   -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN:   -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_20 %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
 // RUN:   -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
 
-// AMDGCN-LABEL: define hidden i32 @fi1a(
-// AMDGCN:    [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
-// AMDGCN:    [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent") monotonic, align 4
-// AMDGCN:    [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
-// AMDGCN:    [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront") monotonic, align 4
-// AMDGCN:    [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
-// SPIRV: define hidden spir_func i32 @fi1a(
-// SPIRV:    [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
-// SPIRV:    [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4
-// SPIRV:    [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
-// SPIRV:    [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4
-// SPIRV:    [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi1a(
+// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN_CL_DEF-NEXT:  [[ENTRY:.*:]]
+// AMDGCN_CL_DEF-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[V:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// AMDGCN_CL_DEF-NEXT:    [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr
+// AMDGCN_CL_DEF-NEXT:    store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP1]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP3]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP5]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP7]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP9]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT:    [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT:    store i32 [[TMP11]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    [[TMP12:%.*]] = load i32, ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT:    ret i32 [[TMP12]]
+//
+// AMDGCN_CL_20-LABEL: define hidden i32 @fi1a(
+// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN_CL_20-NEXT:  [[ENTRY:.*:]]
+// AMDGCN_CL_20-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[V:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGCN_CL_20-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// AMDGCN_CL_20-NEXT:    store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP1]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP3]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP5]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP7]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP9]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT:    [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_20-NEXT:    store i32 [[TMP11]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    [[TMP12:%.*]] = load i32, ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT:    ret i32 [[TMP12]]
+//
+// SPIRV-LABEL: define hidden spir_func i32 @fi1a(
+// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// SPIRV-NEXT:  [[ENTRY:.*:]]
+// SPIRV-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8
+// SPIRV-NEXT:    [[V:%.*]] = alloca i32, align 4
+// SPIRV-NEXT:    store ptr [[I]], ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP1]], ptr [[V]], align 4
+// SPIRV-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP3]], ptr [[V]], align 4
+// SPIRV-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP5]], ptr [[V]], align 4
+// SPIRV-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("workgroup") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP7]], ptr [[V]], align 4
+// SPIRV-NEXT:    [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("subgroup") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP9]], ptr [[V]], align 4
+// SPIRV-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT:    [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// SPIRV-NEXT:    store i32 [[TMP11]], ptr [[V]], align 4
+// SPIRV-NEXT:    [[TMP12:%.*]] = load i32, ptr [[V]], align 4
+// SPIRV-NEXT:    ret i32 [[TMP12]]
+//
 int fi1a(int *i) {
   int v;
   __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
   __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
   __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
+  __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR);
   __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
   __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
   return v;
 }
 
 // AMDGCN-LABEL: define hidden i32 @fi1b(
-// AMDGCN:    [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4
-// AMDGCN:    [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent") monotonic, align 4
-// AMDGCN:    [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
-// AMDGCN:    [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront") monotonic, align 4
-// AMDGCN:    [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4
+// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT:  [[ENTRY:.*:]]
+// AMDGCN-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT:    [[ATOMIC_...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/162575


More information about the cfe-commits mailing list