[clang] [clang] Add support for cluster sync scope (PR #162575)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 8 17:04:02 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: None (macurtis-amd)
<details>
<summary>Changes</summary>
---
Patch is 400.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162575.diff
15 Files Affected:
- (modified) clang/docs/HIPSupport.rst (+2)
- (modified) clang/docs/LanguageExtensions.rst (+1)
- (modified) clang/include/clang/Basic/SyncScope.h (+32-10)
- (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+15-14)
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+4)
- (modified) clang/lib/CodeGen/Targets/SPIR.cpp (+2)
- (modified) clang/lib/Frontend/InitPreprocessor.cpp (+2)
- (modified) clang/test/CodeGen/scoped-atomic-ops.c (+4293-190)
- (modified) clang/test/CodeGen/scoped-fence-ops.c (+28-13)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl (+41-18)
- (modified) clang/test/Preprocessor/init-aarch64.c (+2)
- (modified) clang/test/Preprocessor/init-loongarch.c (+12-10)
- (modified) clang/test/Preprocessor/init.c (+11-6)
- (modified) clang/test/SemaCUDA/atomic-ops.cu (+7-2)
- (modified) clang/test/SemaCUDA/spirv-amdgcn-atomic-ops.cu (+7-2)
``````````diff
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index b4a671e3cfa3c..ec2af2a6f569d 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -164,6 +164,8 @@ Predefined Macros
- Represents wavefront memory scope in HIP (value is 2).
* - ``__HIP_MEMORY_SCOPE_WORKGROUP``
- Represents workgroup memory scope in HIP (value is 3).
+ * - ``__HIP_MEMORY_SCOPE_CLUSTER``
+ - Represents cluster memory scope in HIP (value is 6).
* - ``__HIP_MEMORY_SCOPE_AGENT``
- Represents agent memory scope in HIP (value is 4).
* - ``__HIP_MEMORY_SCOPE_SYSTEM``
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 6bb99c757cd19..bef6e9c14b182 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -4846,6 +4846,7 @@ currently supported:
* ``__MEMORY_SCOPE_SYSTEM``
* ``__MEMORY_SCOPE_DEVICE``
* ``__MEMORY_SCOPE_WRKGRP``
+* ``__MEMORY_SCOPE_CLUSTR``
* ``__MEMORY_SCOPE_WVFRNT``
* ``__MEMORY_SCOPE_SINGLE``
diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h
index 5a8d2a7dd02e5..614e5faa78696 100644
--- a/clang/include/clang/Basic/SyncScope.h
+++ b/clang/include/clang/Basic/SyncScope.h
@@ -43,11 +43,13 @@ enum class SyncScope {
SystemScope,
DeviceScope,
WorkgroupScope,
+ ClusterScope,
WavefrontScope,
SingleScope,
HIPSingleThread,
HIPWavefront,
HIPWorkgroup,
+ HIPCluster,
HIPAgent,
HIPSystem,
OpenCLWorkGroup,
@@ -65,6 +67,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
return "device_scope";
case SyncScope::WorkgroupScope:
return "workgroup_scope";
+ case SyncScope::ClusterScope:
+ return "cluster_scope";
case SyncScope::WavefrontScope:
return "wavefront_scope";
case SyncScope::SingleScope:
@@ -75,6 +79,8 @@ inline llvm::StringRef getAsString(SyncScope S) {
return "hip_wavefront";
case SyncScope::HIPWorkgroup:
return "hip_workgroup";
+ case SyncScope::HIPCluster:
+ return "hip_cluster";
case SyncScope::HIPAgent:
return "hip_agent";
case SyncScope::HIPSystem:
@@ -180,7 +186,10 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
Workgroup = 3,
Agent = 4,
System = 5,
- Last = System
+ Cluster = 6,
+ End,
+ Last = End - 1,
+ Count = Last
};
AtomicScopeHIPModel() {}
@@ -193,10 +202,14 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
return SyncScope::HIPWavefront;
case Workgroup:
return SyncScope::HIPWorkgroup;
+ case Cluster:
+ return SyncScope::HIPCluster;
case Agent:
return SyncScope::HIPAgent;
case System:
return SyncScope::HIPSystem;
+ case End:
+ break;
}
llvm_unreachable("Invalid language sync scope value");
}
@@ -207,11 +220,12 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
}
ArrayRef<unsigned> getRuntimeValues() const override {
- static_assert(Last == System, "Does not include all sync scopes");
static const unsigned Scopes[] = {
static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront),
- static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent),
- static_cast<unsigned>(System)};
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
+ static_cast<unsigned>(System), static_cast<unsigned>(Agent)};
+ static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
+ "Does not include all sync scopes");
return llvm::ArrayRef(Scopes);
}
@@ -223,14 +237,17 @@ class AtomicScopeHIPModel : public AtomicScopeModel {
/// Defines the generic atomic scope model.
class AtomicScopeGenericModel : public AtomicScopeModel {
public:
- /// The enum values match predefined built-in macros __ATOMIC_SCOPE_*.
+ /// The enum values match predefined built-in macros __MEMORY_SCOPE_*.
enum ID {
System = 0,
Device = 1,
Workgroup = 2,
Wavefront = 3,
Single = 4,
- Last = Single
+ Cluster = 5,
+ End,
+ Last = End - 1,
+ Count = End
};
AtomicScopeGenericModel() = default;
@@ -243,10 +260,14 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
return SyncScope::SystemScope;
case Workgroup:
return SyncScope::WorkgroupScope;
+ case Cluster:
+ return SyncScope::ClusterScope;
case Wavefront:
return SyncScope::WavefrontScope;
case Single:
return SyncScope::SingleScope;
+ case End:
+ break;
}
llvm_unreachable("Invalid language sync scope value");
}
@@ -256,11 +277,12 @@ class AtomicScopeGenericModel : public AtomicScopeModel {
}
ArrayRef<unsigned> getRuntimeValues() const override {
- static_assert(Last == Single, "Does not include all sync scopes");
static const unsigned Scopes[] = {
- static_cast<unsigned>(Device), static_cast<unsigned>(System),
- static_cast<unsigned>(Workgroup), static_cast<unsigned>(Wavefront),
- static_cast<unsigned>(Single)};
+ static_cast<unsigned>(System), static_cast<unsigned>(Device),
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Cluster),
+ static_cast<unsigned>(Wavefront), static_cast<unsigned>(Single)};
+ static_assert(sizeof(Scopes) / sizeof(Scopes[0]) == Count,
+ "Does not include all sync scopes");
return llvm::ArrayRef(Scopes);
}
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 6596ec06199dc..97b5828011cd4 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CGBuiltin.h"
+#include "clang/Basic/SyncScope.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -242,33 +243,33 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
}
// Older builtins had an enum argument for the memory scope.
+ const char *ssn = nullptr;
int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
switch (scope) {
- case 0: // __MEMORY_SCOPE_SYSTEM
+ case AtomicScopeGenericModel::System: // __MEMORY_SCOPE_SYSTEM
SSID = llvm::SyncScope::System;
break;
- case 1: // __MEMORY_SCOPE_DEVICE
- if (getTarget().getTriple().isSPIRV())
- SSID = getLLVMContext().getOrInsertSyncScopeID("device");
- else
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ case AtomicScopeGenericModel::Device: // __MEMORY_SCOPE_DEVICE
+ ssn = getTarget().getTriple().isSPIRV() ? "device" : "agent";
break;
- case 2: // __MEMORY_SCOPE_WRKGRP
- SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
+ case AtomicScopeGenericModel::Workgroup: // __MEMORY_SCOPE_WRKGRP
+ ssn = "workgroup";
break;
- case 3: // __MEMORY_SCOPE_WVFRNT
- if (getTarget().getTriple().isSPIRV())
- SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
- else
- SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+ case AtomicScopeGenericModel::Cluster: // __MEMORY_SCOPE_CLUSTR
+ ssn = getTarget().getTriple().isSPIRV() ? "workgroup" : "cluster";
+ break;
+ case AtomicScopeGenericModel::Wavefront: // __MEMORY_SCOPE_WVFRNT
+ ssn = getTarget().getTriple().isSPIRV() ? "subgroup" : "wavefront";
break;
- case 4: // __MEMORY_SCOPE_SINGLE
+ case AtomicScopeGenericModel::Single: // __MEMORY_SCOPE_SINGLE
SSID = llvm::SyncScope::SingleThread;
break;
default:
SSID = llvm::SyncScope::System;
break;
}
+ if (ssn)
+ SSID = getLLVMContext().getOrInsertSyncScopeID(ssn);
}
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 0fcbf7e458a34..c74a1b6098922 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -488,6 +488,10 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
case SyncScope::WavefrontScope:
Name = "wavefront";
break;
+ case SyncScope::HIPCluster:
+ case SyncScope::ClusterScope:
+ Name = "cluster";
+ break;
case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
case SyncScope::WorkgroupScope:
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 4aa63143a66cd..fbf29186faf24 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -90,6 +90,8 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
case SyncScope::OpenCLSubGroup:
case SyncScope::WavefrontScope:
return "subgroup";
+ case SyncScope::HIPCluster:
+ case SyncScope::ClusterScope:
case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
case SyncScope::WorkgroupScope:
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index b899fb9c6494a..21ab9dca8f0bd 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -616,6 +616,7 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_CLUSTER", "6");
if (LangOpts.HIPStdPar) {
Builder.defineMacro("__HIPSTDPAR__");
if (LangOpts.HIPStdParInterposeAlloc) {
@@ -904,6 +905,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("__MEMORY_SCOPE_WRKGRP", "2");
Builder.defineMacro("__MEMORY_SCOPE_WVFRNT", "3");
Builder.defineMacro("__MEMORY_SCOPE_SINGLE", "4");
+ Builder.defineMacro("__MEMORY_SCOPE_CLUSTR", "5");
// Define macros for the OpenCL memory scope.
// The values should match AtomicScopeOpenCLModel::ID enum.
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index 545a6c90892c2..c39048120a457 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -1,113 +1,772 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_DEF %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_20 %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
-// AMDGCN-LABEL: define hidden i32 @fi1a(
-// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
-// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent") monotonic, align 4
-// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
-// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront") monotonic, align 4
-// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
-// SPIRV: define hidden spir_func i32 @fi1a(
-// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
-// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4
-// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
-// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4
-// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_DEF-LABEL: define hidden i32 @fi1a(
+// AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]]
+// AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_DEF-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// AMDGCN_CL_DEF-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr
+// AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP1]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP5]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP9]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_DEF-NEXT: store i32 [[TMP11]], ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = load i32, ptr [[V_ASCAST]], align 4
+// AMDGCN_CL_DEF-NEXT: ret i32 [[TMP12]]
+//
+// AMDGCN_CL_20-LABEL: define hidden i32 @fi1a(
+// AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN_CL_20-NEXT: [[ENTRY:.*:]]
+// AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN_CL_20-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8
+// AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// AMDGCN_CL_20-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(5) [[V]], align 4
+// AMDGCN_CL_20-NEXT: ret i32 [[TMP12]]
+//
+// SPIRV-LABEL: define hidden spir_func i32 @fi1a(
+// SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// SPIRV-NEXT: [[ENTRY:.*:]]
+// SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
+// SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4
+// SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP1]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP3]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP5]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("workgroup") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP7]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("subgroup") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP9]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// SPIRV-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4
+// SPIRV-NEXT: store i32 [[TMP11]], ptr [[V]], align 4
+// SPIRV-NEXT: [[TMP12:%.*]] = load i32, ptr [[V]], align 4
+// SPIRV-NEXT: ret i32 [[TMP12]]
+//
int fi1a(int *i) {
int v;
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
+ __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
return v;
}
// AMDGCN-LABEL: define hidden i32 @fi1b(
-// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4
-// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent") monotonic, align 4
-// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
-// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront") monotonic, align 4
-// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4
+// AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGCN-NEXT: [[ENTRY:.*:]]
+// AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// AMDGCN-NEXT: [[ATOMIC_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/162575
More information about the cfe-commits
mailing list