[clang] [AMDGPU][SPIRV] Use SPIR-V syncscopes for some AMDGCN BIs (PR #154867)
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 9 08:11:24 PDT 2025
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/154867
>From f65fa1deedae45ad8f3d007e39c914c091387be2 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 22 Aug 2025 01:02:42 +0100
Subject: [PATCH 1/3] Use "device" instead of "agent" and "subgroup" instead of
"wavefront" for AMDGCN SPIR-V.
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 23 +-
.../builtin-amdgcn-atomic-inc-dec.cpp | 423 +++++++++++++++++-
.../test/CodeGenCXX/builtin-amdgcn-fence.cpp | 103 ++++-
.../CodeGenOpenCL/builtins-amdgcn-gfx11.cl | 21 +-
.../test/CodeGenOpenCL/builtins-amdgcn-vi.cl | 28 +-
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 3 +-
6 files changed, 572 insertions(+), 29 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index dad1f95ac710d..5951569a00257 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -194,7 +194,7 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
-// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// \p Order takes a C++11 compatible memory-ordering specifier and converts
// it into LLVM's memory ordering specifier using atomic C ABI, and writes
// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
// specific SyncScopeID and writes it to \p SSID.
@@ -227,6 +227,12 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
// Some of the atomic builtins take the scope as a string name.
StringRef scp;
if (llvm::getConstantStringInfo(Scope, scp)) {
+ if (getTarget().getTriple().isSPIRV()) {
+ if (scp == "agent")
+ scp = "device";
+ else if (scp == "wavefront")
+ scp = "subgroup";
+ }
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
return;
}
@@ -238,13 +244,19 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
SSID = llvm::SyncScope::System;
break;
case 1: // __MEMORY_SCOPE_DEVICE
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
break;
case 2: // __MEMORY_SCOPE_WRKGRP
SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
break;
case 3: // __MEMORY_SCOPE_WVFRNT
- SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("subgroup");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
break;
case 4: // __MEMORY_SCOPE_SINGLE
SSID = llvm::SyncScope::SingleThread;
@@ -1381,7 +1393,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
//
// The global/flat cases need to use agent scope to consistently produce
// the native instruction instead of a cmpxchg expansion.
- SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
+ if (getTarget().getTriple().isSPIRV())
+ SSID = getLLVMContext().getOrInsertSyncScopeID("device");
+ else
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
AO = AtomicOrdering::Monotonic;
// The v2bf16 builtin uses i16 instead of a natural bfloat type.
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
index 5920ceda4a811..751985a76f493 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: @_Z29test_non_volatile_parameter32Pj(
// CHECK-NEXT: entry:
@@ -21,6 +24,43 @@
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter32Pj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -47,6 +87,43 @@ __attribute__((device)) void test_non_volatile_parameter32(__UINT32_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z29test_non_volatile_parameter64Py(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z29test_non_volatile_parameter64Py(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -73,6 +150,43 @@ __attribute__((device)) void test_non_volatile_parameter64(__UINT64_TYPE__ *ptr)
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter32PVj(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[TMP5]], align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr [[RES_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter32PVj(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i32, ptr addrspace(4) [[TMP1]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i32, ptr addrspace(4) [[TMP5]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i32 [[TMP6]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__ *ptr) {
__UINT32_TYPE__ res;
@@ -99,6 +213,43 @@ __attribute__((device)) void test_volatile_parameter32(volatile __UINT32_TYPE__
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z25test_volatile_parameter64PVy(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// GCN-NEXT: [[RES:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR_ADDR]] to ptr
+// GCN-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RES]] to ptr
+// GCN-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load volatile i64, ptr [[TMP1]], align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP5:%.*]] = load ptr, ptr [[PTR_ADDR_ASCAST]], align 8
+// GCN-NEXT: [[TMP6:%.*]] = load volatile i64, ptr [[TMP5]], align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr [[RES_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z25test_volatile_parameter64PVy(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// AMDGCNSPIRV-NEXT: [[RES:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[PTR_ADDR]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[RES_ASCAST:%.*]] = addrspacecast ptr [[RES]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: store ptr addrspace(4) [[PTR:%.*]], ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load volatile i64, ptr addrspace(4) [[TMP1]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw volatile uinc_wrap ptr addrspace(4) [[TMP0]], i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[PTR_ADDR_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load volatile i64, ptr addrspace(4) [[TMP5]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw volatile udec_wrap ptr addrspace(4) [[TMP4]], i64 [[TMP6]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) [[RES_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__ *ptr) {
__UINT64_TYPE__ res;
@@ -116,6 +267,25 @@ __attribute__((device)) void test_volatile_parameter64(volatile __UINT64_TYPE__
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -134,6 +304,25 @@ __attribute__((device)) void test_shared32() {
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_shared64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_shared64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ13test_shared64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_shared64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -153,6 +342,25 @@ __attribute__((device)) __UINT32_TYPE__ global_val32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val32 to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global32() {
global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
@@ -170,6 +378,25 @@ __attribute__((device)) __UINT64_TYPE__ global_val64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z13test_global64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(1) @global_val64 to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z13test_global64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global_val64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_global64() {
global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
@@ -189,6 +416,29 @@ __attribute__((constant)) __UINT32_TYPE__ cval32;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval32 to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant32() {
__UINT32_TYPE__ local_val;
@@ -210,6 +460,29 @@ __attribute__((constant)) __UINT64_TYPE__ cval64;
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z15test_constant64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8, addrspace(5)
+// GCN-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LOCAL_VAL]] to ptr
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(4) @cval64 to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr [[LOCAL_VAL_ASCAST]], align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z15test_constant64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8
+// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_constant64() {
__UINT64_TYPE__ local_val;
@@ -240,6 +513,49 @@ __attribute__((device)) void test_constant64() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP8:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP10:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("workgroup") acquire, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("workgroup") release, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP8]] syncscope("workgroup") acq_rel, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), i32 [[TMP10]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -278,6 +594,49 @@ __attribute__((device)) void test_order32() {
// CHECK-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_order64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP8:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP9]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP10:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP11]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_order64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") monotonic, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("workgroup") acquire, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("workgroup") release, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP8:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP9:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP8]] syncscope("workgroup") acq_rel, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP9]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP10:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP11:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), i64 [[TMP10]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP11]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_order64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_order64() {
__attribute__((shared)) __UINT64_TYPE__ val;
@@ -310,6 +669,37 @@ __attribute__((device)) void test_order64() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope32v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP4:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP4]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP6:%.*]] = load i32, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), i32 [[TMP6]] syncscope("wavefront") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i32 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr), align 4
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope32v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP0]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP4]] syncscope("device") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), i32 [[TMP6]] syncscope("subgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i32 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope32vE3val to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope32() {
__attribute__((shared)) __UINT32_TYPE__ val;
@@ -338,6 +728,37 @@ __attribute__((device)) void test_scope32() {
// CHECK-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
// CHECK-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
// CHECK-NEXT: ret void
+// GCN-LABEL: @_Z12test_scope64v(
+// GCN-NEXT: entry:
+// GCN-NEXT: [[TMP0:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP1]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP2:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP3]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP4:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP4]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP5]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP6:%.*]] = load i64, ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), i64 [[TMP6]] syncscope("wavefront") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META4]]
+// GCN-NEXT: store i64 [[TMP7]], ptr addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr), align 8
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: @_Z12test_scope64v(
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP0]] seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP5:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP4]] syncscope("device") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP5]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP6:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP7:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), i64 [[TMP6]] syncscope("subgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: store i64 [[TMP7]], ptr addrspace(4) addrspacecast (ptr addrspace(3) @_ZZ12test_scope64vE3val to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: ret void
//
__attribute__((device)) void test_scope64() {
__attribute__((shared)) __UINT64_TYPE__ val;
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 1e977dd6420f4..dd1ca459d68b5 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,7 +1,10 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// REQUIRES: amdgpu-registered-target
+// REQUIRES: spirv-registered-target
// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
-// RUN: -triple=amdgcn-amd-amdhsa | FileCheck %s
+// RUN: -triple=amdgcn-amd-amdhsa | FileCheck --check-prefix=GCN %s
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
+// RUN: -triple=spirv64-amd-amdhsa | FileCheck --check-prefix=AMDGCNSPIRV %s
// CHECK-LABEL: define dso_local void @_Z25test_memory_fence_successv(
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -12,6 +15,25 @@
// CHECK-NEXT: fence syncscope("agent") acq_rel
// CHECK-NEXT: fence syncscope("workgroup") release
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z25test_memory_fence_successv(
+// GCN-SAME: ) #[[ATTR0:[0-9]+]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst
+// GCN-NEXT: fence syncscope("agent") acquire
+// GCN-NEXT: fence seq_cst
+// GCN-NEXT: fence syncscope("agent") acq_rel
+// GCN-NEXT: fence syncscope("workgroup") release
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z25test_memory_fence_successv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire
+// AMDGCNSPIRV-NEXT: fence seq_cst
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release
+// AMDGCNSPIRV-NEXT: ret void
//
void test_memory_fence_success() {
@@ -35,6 +57,25 @@ void test_memory_fence_success() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_localv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_localv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_local() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -58,6 +99,25 @@ void test_local() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z11test_globalv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META4]]
+// GCN-NEXT: fence seq_cst, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META4]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z11test_globalv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META4:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META4]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_global() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "global");
@@ -80,6 +140,25 @@ void test_global() {
// CHECK-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
// CHECK-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_imagev(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acquire, !mmra [[META3]]
+// GCN-NEXT: fence seq_cst, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("agent") acq_rel, !mmra [[META3]]
+// GCN-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_imagev(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acquire, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence seq_cst, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("device") acq_rel, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") release, !mmra [[META3]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_image() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local");
@@ -99,13 +178,33 @@ void test_image() {
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
// CHECK-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
// CHECK-NEXT: ret void
+// GCN-LABEL: define dso_local void @_Z10test_mixedv(
+// GCN-SAME: ) #[[ATTR0]] {
+// GCN-NEXT: entry:
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// GCN-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// GCN-NEXT: ret void
+//
+// AMDGCNSPIRV-LABEL: define spir_func void @_Z10test_mixedv(
+// AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0]] {
+// AMDGCNSPIRV-NEXT: entry:
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5:![0-9]+]]
+// AMDGCNSPIRV-NEXT: fence syncscope("workgroup") seq_cst, !mmra [[META5]]
+// AMDGCNSPIRV-NEXT: ret void
//
void test_mixed() {
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "global");
__builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
}
-//.
// CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
// CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
// CHECK: [[META5]] = !{[[META4]], [[META3]]}
//.
+// GCN: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// GCN: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// GCN: [[META5]] = !{[[META4]], [[META3]]}
+//.
+// AMDGCNSPIRV: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// AMDGCNSPIRV: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
+// AMDGCNSPIRV: [[META5]] = !{[[META4]], [[META3]]}
+//.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index 19ab6562e52b9..7cd3f1417844c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -1,13 +1,13 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1101 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1102 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1103 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
typedef unsigned int uint;
typedef unsigned long ulong;
@@ -50,7 +50,8 @@ void test_s_wait_event_export_ready() {
}
// CHECK-LABEL: @test_global_add_f32
-// CHECK: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// GCN: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
+// AMDGCNSPIRV: = atomicrmw fadd ptr addrspace(1) %addr, float %x syncscope("device") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
#if !defined(__SPIRV__)
void test_global_add_f32(float *rtn, global float *addr, float x) {
#else
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 5f202baa8a592..6bb20bff436fb 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -1,9 +1,9 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
@@ -252,9 +252,11 @@ void test_update_dpp_const_int(global int* out, int arg1)
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fadd ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
#if !defined(__SPIRV__)
@@ -293,9 +295,11 @@ void test_ds_faddf(local float *out, float src) {
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
@@ -334,9 +338,11 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("device") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
-// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// GCN: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// AMDGCNSPIRV: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("subgroup") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index bf022bc6eb446..8a07f33e67090 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -853,7 +853,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu
// CHECK: atomicrmw udec_wrap ptr addrspace(3) %lptr, i32 %val syncscope("workgroup") seq_cst, align 4
res = __builtin_amdgcn_atomic_dec32(lptr, val, __ATOMIC_SEQ_CST, "workgroup");
- // CHECK: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-AMDGCN: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("agent") seq_cst, align 4
+ // CHECK-SPIRV: atomicrmw uinc_wrap ptr addrspace(1) %gptr, i32 %val syncscope("device") seq_cst, align 4
res = __builtin_amdgcn_atomic_inc32(gptr, val, __ATOMIC_SEQ_CST, "agent");
// CHECK: atomicrmw udec_wrap ptr addrspace(1) %gptr, i32 %val seq_cst, align 4
>From 4d6413cefd1a95fb97af2f174a2949e2fd9c621f Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 22 Aug 2025 16:18:03 +0100
Subject: [PATCH 2/3] Hoist scope mapping into named function.
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 5951569a00257..f686773025e10 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -192,6 +192,14 @@ static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
return CGF.Builder.CreateCall(F, {Src0, Src1});
}
+static inline StringRef mapScopeToSPIRV(StringRef AMDGCNScope) {
+ if (AMDGCNScope == "agent")
+ return "device";
+ if (AMDGCNScope == "wavefront")
+ return "subgroup";
+ return AMDGCNScope;
+}
+
// For processing memory ordering and memory scope arguments of various
// amdgcn builtins.
// \p Order takes a C++11 compatible memory-ordering specifier and converts
@@ -227,12 +235,8 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
// Some of the atomic builtins take the scope as a string name.
StringRef scp;
if (llvm::getConstantStringInfo(Scope, scp)) {
- if (getTarget().getTriple().isSPIRV()) {
- if (scp == "agent")
- scp = "device";
- else if (scp == "wavefront")
- scp = "subgroup";
- }
+ if (getTarget().getTriple().isSPIRV())
+ scp = mapScopeToSPIRV(scp);
SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
return;
}
>From e744e4d02304351db2ec92d2cf274e2919ece82f Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Fri, 22 Aug 2025 18:55:04 +0100
Subject: [PATCH 3/3] Fix test.
---
.../CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
index 751985a76f493..137a49beee9a6 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -432,11 +432,11 @@ __attribute__((constant)) __UINT32_TYPE__ cval32;
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i32, align 4
// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
-// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), align 4
-// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP0]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
// AMDGCNSPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
-// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), align 4
-// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), align 4
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval32 to ptr addrspace(4)), i32 [[TMP2]] syncscope("workgroup") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META5]]
// AMDGCNSPIRV-NEXT: store i32 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 4
// AMDGCNSPIRV-NEXT: ret void
//
@@ -476,11 +476,11 @@ __attribute__((constant)) __UINT64_TYPE__ cval64;
// AMDGCNSPIRV-NEXT: entry:
// AMDGCNSPIRV-NEXT: [[LOCAL_VAL:%.*]] = alloca i64, align 8
// AMDGCNSPIRV-NEXT: [[LOCAL_VAL_ASCAST:%.*]] = addrspacecast ptr [[LOCAL_VAL]] to ptr addrspace(4)
-// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), align 8
-// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = atomicrmw uinc_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP0]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
// AMDGCNSPIRV-NEXT: store i64 [[TMP1]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
-// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), align 8
-// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(2) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
+// AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), align 8
+// AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = atomicrmw udec_wrap ptr addrspace(4) addrspacecast (ptr addrspace(1) @cval64 to ptr addrspace(4)), i64 [[TMP2]] syncscope("workgroup") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META5]]
// AMDGCNSPIRV-NEXT: store i64 [[TMP3]], ptr addrspace(4) [[LOCAL_VAL_ASCAST]], align 8
// AMDGCNSPIRV-NEXT: ret void
//
More information about the cfe-commits
mailing list