[clang] [llvm] [SPIRV][RFC] Rework / extend support for memory scopes (PR #106429)
Alex Voicu via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 18 13:48:09 PDT 2024
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/106429
>From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Sun, 11 Aug 2024 01:39:46 +0300
Subject: [PATCH 1/8] Tweak AMDGCNSPIRV ABI to allow for the correct handling
of aggregates passed to kernels / functions.
---
clang/lib/CodeGen/Targets/SPIR.cpp | 73 +-
.../amdgpu-kernel-arg-pointer-type.cu | 723 ++++++++++++++++--
clang/test/CodeGenCUDA/kernel-args.cu | 6 +
3 files changed, 731 insertions(+), 71 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index cf068cbc4fcd36..1319332635b863 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo {
void computeInfo(CGFunctionInfo &FI) const override;
private:
+ ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
+ ABIArgInfo classifyArgumentType(QualType Ty) const;
};
} // end anonymous namespace
namespace {
@@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() {
RuntimeCC = llvm::CallingConv::SPIR_FUNC;
}
+ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const {
+ if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+ return DefaultABIInfo::classifyReturnType(RetTy);
+ if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI()))
+ return DefaultABIInfo::classifyReturnType(RetTy);
+
+ if (const RecordType *RT = RetTy->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyReturnType(RetTy);
+ }
+
+ // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to
+ // avoid encoding various architecture specific bits here we return everything
+ // as direct to retain type info for things like aggregates, for later perusal
+ // when translating back to LLVM/lowering in the BE. This is also why we
+ // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU.
+ // This will be revisited / optimised in the future.
+ return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false);
+}
+
ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
if (getContext().getLangOpts().CUDAIsDevice) {
// Coerce pointer arguments with default address space to CrossWorkGroup
@@ -78,18 +101,52 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
- // Force copying aggregate type in kernel arguments by value when
- // compiling CUDA targeting SPIR-V. This is required for the object
- // copied to be valid on the device.
- // This behavior follows the CUDA spec
- // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
- // and matches the NVPTX implementation.
- if (isAggregateTypeForABI(Ty))
- return getNaturalAlignIndirect(Ty, /* byval */ true);
+ if (isAggregateTypeForABI(Ty)) {
+ if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
+ // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
+ // currently expressible in SPIR-V; SPIR-V passes aggregates byval,
+ // which the AMDGPU kernel ABI does not allow. Passing aggregates as
+ // direct works around this impedance mismatch, as it retains type info
+ // and can be correctly handled, post reverse-translation, by the AMDGPU
+ // BE, which has to support this CC for legacy OpenCL purposes. It can
+ // be brittle and does lead to performance degradation in certain
+ // pathological cases. This will be revisited / optimised in the future,
+ // once a way to deal with the byref/byval impedance mismatch is
+ // identified.
+ return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
+ else
+ // Force copying aggregate type in kernel arguments by value when
+ // compiling CUDA targeting SPIR-V. This is required for the object
+ // copied to be valid on the device.
+ // This behavior follows the CUDA spec
+ // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+ // and matches the NVPTX implementation.
+ return getNaturalAlignIndirect(Ty, /* byval */ true);
+ }
}
return classifyArgumentType(Ty);
}
+ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const {
+ if (getTarget().getTriple().getVendor() != llvm::Triple::AMD)
+ return DefaultABIInfo::classifyArgumentType(Ty);
+ if (!isAggregateTypeForABI(Ty))
+ return DefaultABIInfo::classifyArgumentType(Ty);
+
+ // Records with non-trivial destructors/copy-constructors should not be
+ // passed by value.
+ if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
+ return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+
+ if (const RecordType *RT = Ty->getAs<RecordType>()) {
+ const RecordDecl *RD = RT->getDecl();
+ if (RD->hasFlexibleArrayMember())
+ return DefaultABIInfo::classifyArgumentType(Ty);
+ }
+
+ return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false);
+}
+
void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
// The logic is same as in DefaultABIInfo with an exception on the kernel
// arguments handling.
diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
index 70c86cbb8c3d40..b295bbbdaaf955 100644
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -1,8 +1,11 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK-SPIRV %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT
+// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT-SPIRV
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
#include "Inputs/cuda.h"
@@ -11,41 +14,260 @@
// global ones.
// On the host-side compilation, generic pointer won't be coerced.
-// HOST-NOT: %struct.S.coerce
-// HOST-NOT: %struct.T.coerce
-
-// HOST: define{{.*}} void @_Z22__device_stub__kernel1Pi(ptr noundef %x)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(ptr addrspace(1){{.*}} %x.coerce)
-// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
-// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}}
-// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
-// OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4
-// OPT: ret void
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi(
+// CHECK-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
+// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi(
+// OPT-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi(
+// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi(
+// HOST-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
+// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel1Pi)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
+//
__global__ void kernel1(int *x) {
x[0]++;
}
-// HOST: define{{.*}} void @_Z22__device_stub__kernel2Ri(ptr noundef nonnull align 4 dereferenceable(4) %x)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(ptr addrspace(1){{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
-// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
-// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}}
-// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
-// OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4
-// OPT: ret void
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri(
+// CHECK-SAME: ptr addrspace(1) noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
+// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri(
+// OPT-SAME: ptr addrspace(1) nocapture noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri(
+// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri(
+// HOST-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
+// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel2Ri)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
+//
__global__ void kernel2(int &x) {
x++;
}
-// HOST: define{{.*}} void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(ptr addrspace(2) noundef %x, ptr addrspace(1) noundef %y)
-// CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(ptr addrspace(2){{.*}} %x, ptr addrspace(1){{.*}} %y)
-// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
+// CHECK-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8, addrspace(5)
+// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[Y_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
+// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
+// CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
+// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr [[Y_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store ptr addrspace(2) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[Y]], ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0
+// CHECK-SPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
+// OPT-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
+// OPT-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i(
+// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4
+// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(
+// HOST-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8
+// HOST-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8
+// HOST-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR]], align 8
+// HOST-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
+// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[Y_ADDR]], i64 8, i64 8)
+// HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+// HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT1:.*]], label %[[SETUP_END]]
+// HOST: [[SETUP_NEXT1]]:
+// HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel3PU3AS2iPU3AS1i)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
+//
__global__ void kernel3(__attribute__((address_space(2))) int *x,
__attribute__((address_space(1))) int *y) {
y[0] = x[0];
}
-// COMMON-LABEL: define{{.*}} void @_Z4funcPi(ptr{{.*}} %x)
-// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
+// CHECK-LABEL: define dso_local void @_Z4funcPi(
+// CHECK-SAME: ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_func void @_Z4funcPi(
+// CHECK-SPIRV-SAME: ptr addrspace(4) noundef [[X:%.*]]) addrspace(4) #[[ATTR1:[0-9]+]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local void @_Z4funcPi(
+// OPT-SAME: ptr nocapture noundef [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-NEXT: store i32 [[INC]], ptr [[X]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi(
+// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[X]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
__device__ void func(int *x) {
x[0]++;
}
@@ -57,29 +279,202 @@ struct S {
// `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
// by-val). However, the enhanced address inferring pass should be able to
// assume they are global pointers.
+// For SPIR-V, since byref is not supported at the moment, we pass it as direct.
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S(
+// CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8, addrspace(5)
+// CHECK-NEXT: [[S:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[S]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Y]], align 8
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00
+// CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
+// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8
+// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1
+// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8
+// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1
+// CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S(
+// OPT-SAME: ptr addrspace(4) nocapture noundef readonly byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4:![0-9]+]]
+// OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1)
+// OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8
+// OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]]
+// OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1)
+// OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]]
+// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[TMP1]], align 4
+// OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4
+// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00
+// OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP2]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S(
+// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S(
+// HOST-SAME: ptr [[S_COERCE0:%.*]], ptr [[S_COERCE1:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 0
+// HOST-NEXT: store ptr [[S_COERCE0]], ptr [[TMP0]], align 8
+// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 1
+// HOST-NEXT: store ptr [[S_COERCE1]], ptr [[TMP1]], align 8
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[S]], i64 16, i64 0)
+// HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+// HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel41S)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
//
-// HOST: define{{.*}} void @_Z22__device_stub__kernel41S(ptr %s.coerce0, ptr %s.coerce1)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(ptr addrspace(4){{.*}} byref(%struct.S) align 8 %0)
-// OPT: [[P0:%.*]] = load ptr, ptr addrspace(4) %0, align 8
-// OPT: [[G0:%.*]] ={{.*}} addrspacecast ptr [[P0]] to ptr addrspace(1)
-// OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8
-// OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8
-// OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1)
-// OPT: [[V0:%.*]] = load i32, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD:[0-9]+]]
-// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
-// OPT: store i32 [[INC]], ptr addrspace(1) [[G0]], align 4
-// OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4
-// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
-// OPT: store float [[ADD]], ptr addrspace(1) [[G1]], align 4
-// OPT: ret void
__global__ void kernel4(struct S s) {
s.x[0]++;
s.y[0] += 1.f;
}
// If a pointer to struct is passed, only the pointer itself is coerced into the global one.
-// HOST: define{{.*}} void @_Z22__device_stub__kernel5P1S(ptr noundef %s)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(ptr addrspace(1){{.*}} %s.coerce)
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
+// CHECK-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
+// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr [[S_ASCAST]], align 8
+// CHECK-NEXT: [[S1:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[S1]], ptr [[S_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP3]], i32 0, i32 1
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[Y]], align 8
+// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0
+// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
+// CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
+// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr [[S_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr addrspace(4) [[S_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[S1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[S1]], ptr addrspace(4) [[S_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr addrspace(4) [[TMP0]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[TMP3]], i32 0, i32 1
+// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S(
+// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8
+// OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
+// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// OPT-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
+// OPT-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8
+// OPT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(1) [[Y]], align 8
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
+// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
+// OPT-NEXT: store float [[ADD]], ptr [[TMP2]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S(
+// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8
+// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4
+// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8
+// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8
+// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S(
+// HOST-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[S_ADDR]], i64 8, i64 0)
+// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel5P1S)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
+//
__global__ void kernel5(struct S *s) {
s->x[0]++;
s->y[0] += 1.f;
@@ -91,29 +486,174 @@ struct T {
// `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
// by-val). However, the enhanced address inferring pass should be able to
// assume they are global pointers.
+// For SPIR-V, since byref is not supported at the moment, we pass it as direct.
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T(
+// CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8, addrspace(5)
+// CHECK-NEXT: [[T:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
+// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[T]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false)
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X]], i64 0, i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00
+// CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0
+// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X2]], i64 0, i64 1
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8
+// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
+// CHECK-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00
+// CHECK-NEXT: store float [[ADD5]], ptr [[ARRAYIDX4]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
+// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8
+// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
+// CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
+// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4
+// CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1
+// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4
+// CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T(
+// OPT-SAME: ptr addrspace(4) nocapture noundef readonly byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4]]
+// OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1)
+// OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8
+// OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]]
+// OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1)
+// OPT-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]]
+// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00
+// OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP1]], align 4
+// OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4
+// OPT-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00
+// OPT-NEXT: store float [[ADD5]], ptr addrspace(1) [[TMP2]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T(
+// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0
+// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0
+// OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4
+// OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T(
+// HOST-SAME: ptr [[T_COERCE0:%.*]], ptr [[T_COERCE1:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 0
+// HOST-NEXT: store ptr [[T_COERCE0]], ptr [[TMP0]], align 8
+// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 1
+// HOST-NEXT: store ptr [[T_COERCE1]], ptr [[TMP1]], align 8
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[T]], i64 16, i64 0)
+// HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
+// HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel61T)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
//
-// HOST: define{{.*}} void @_Z22__device_stub__kernel61T(ptr %t.coerce0, ptr %t.coerce1)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(ptr addrspace(4){{.*}} byref(%struct.T) align 8 %0)
-// OPT: [[P0:%.*]] = load ptr, ptr addrspace(4) %0, align 8
-// OPT: [[G0:%.*]] ={{.*}} addrspacecast ptr [[P0]] to ptr addrspace(1)
-// OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8
-// OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8
-// OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1)
-// OPT: [[V0:%.*]] = load float, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD]]
-// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
-// OPT: store float [[ADD0]], ptr addrspace(1) [[G0]], align 4
-// OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4
-// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
-// OPT: store float [[ADD1]], ptr addrspace(1) [[G1]], align 4
-// OPT: ret void
__global__ void kernel6(struct T t) {
t.x[0][0] += 1.f;
t.x[1][0] += 2.f;
}
// Check that coerced pointers retain the noalias attribute when qualified with __restrict.
-// HOST: define{{.*}} void @_Z22__device_stub__kernel7Pi(ptr noalias noundef %x)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(ptr addrspace(1) noalias{{.*}} %x.coerce)
+
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi(
+// CHECK-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
+// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
+// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8
+// CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8
+// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
+// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi(
+// OPT-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1
+// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi(
+// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
+// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1
+// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi(
+// HOST-SAME: ptr noalias noundef [[X:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0)
+// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel7Pi)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
+//
__global__ void kernel7(int *__restrict x) {
x[0]++;
}
@@ -122,13 +662,70 @@ __global__ void kernel7(int *__restrict x) {
struct SS {
float *x;
};
-// HOST: define{{.*}} void @_Z22__device_stub__kernel82SS(ptr %a.coerce)
-// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(ptr addrspace(1){{.*}} %a.coerce)
-// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr
-// OPT: [[VAL:%.*]] = load float, ptr addrspace(1) %a.coerce, align 4{{$}}
-// OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00
-// OPT: store float [[INC]], ptr addrspace(1) %a.coerce, align 4
-// OPT: ret void
+// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS(
+// CHECK-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8, addrspace(5)
+// CHECK-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0
+// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00
+// CHECK-NEXT: store float [[ADD]], ptr [[TMP0]], align 4
+// CHECK-NEXT: ret void
+//
+// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
+// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] {
+// CHECK-SPIRV-NEXT: [[ENTRY:.*:]]
+// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8
+// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4)
+// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
+// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8
+// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0
+// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8
+// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4
+// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00
+// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4
+// CHECK-SPIRV-NEXT: ret void
+//
+// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS(
+// OPT-SAME: ptr addrspace(1) nocapture [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// OPT-NEXT: [[ENTRY:.*:]]
+// OPT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[A_COERCE]], align 4
+// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP0]], 3.000000e+00
+// OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[A_COERCE]], align 4
+// OPT-NEXT: ret void
+//
+// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS(
+// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] {
+// OPT-SPIRV-NEXT: [[ENTRY:.*:]]
+// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0
+// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00
+// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4
+// OPT-SPIRV-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS(
+// HOST-SAME: ptr [[A_COERCE:%.*]]) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
+// HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A]], i32 0, i32 0
+// HOST-NEXT: store ptr [[A_COERCE]], ptr [[COERCE_DIVE]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[A]], i64 8, i64 0)
+// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]]
+// HOST: [[SETUP_NEXT]]:
+// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel82SS)
+// HOST-NEXT: br label %[[SETUP_END]]
+// HOST: [[SETUP_END]]:
+// HOST-NEXT: ret void
+//
__global__ void kernel8(struct SS a) {
*a.x += 3.f;
}
+//.
+// OPT: [[META4]] = !{}
+//.
diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu
index bcce729f14481c..8d17d89b315dec 100644
--- a/clang/test/CodeGenCUDA/kernel-args.cu
+++ b/clang/test/CodeGenCUDA/kernel-args.cu
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s
+// RUN: %clang_cc1 -x hip -triple spirv64-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCNSPIRV %s
// RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \
// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s
#include "Inputs/cuda.h"
@@ -10,6 +12,7 @@ struct A {
};
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
+// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(%struct.A %{{.+}})
// NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x)
__global__ void kernel(A x) {
}
@@ -17,6 +20,7 @@ __global__ void kernel(A x) {
class Kernel {
public:
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}})
+ // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %{{.+}})
// NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x)
static __global__ void memberKernel(A x){}
template<typename T> static __global__ void templateMemberKernel(T x) {}
@@ -31,10 +35,12 @@ void launch(void*);
void test() {
Kernel K;
// AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
+ // AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(%struct.A %{{.+}})
// NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)templateKernel<A>);
// AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}
+ // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %{{.+}}
// NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x)
launch((void*)Kernel::templateMemberKernel<A>);
}
>From 757e119809bc5e088eb85118aae028b24e062081 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Sun, 11 Aug 2024 02:27:25 +0300
Subject: [PATCH 2/8] Fix formatting error.
---
clang/lib/CodeGen/Targets/SPIR.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 1319332635b863..3fca9c52d5c290 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -101,7 +101,7 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
- if (isAggregateTypeForABI(Ty)) {
+ if (isAggregateTypeForABI(Ty)) {
if (getTarget().getTriple().getVendor() == llvm::Triple::AMD)
// TODO: The AMDGPU kernel ABI passes aggregates byref, which is not
// currently expressible in SPIR-V; SPIR-V passes aggregates byval,
>From 13f83ac0870182c0e56ce4fbdc6815da6be256c1 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Mon, 19 Aug 2024 17:45:19 +0100
Subject: [PATCH 3/8] No else after return.
---
clang/lib/CodeGen/Targets/SPIR.cpp | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index 3fca9c52d5c290..cc52925e2e523f 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -114,14 +114,13 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
// once a way to deal with the byref/byval impedance mismatch is
// identified.
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
- else
- // Force copying aggregate type in kernel arguments by value when
- // compiling CUDA targeting SPIR-V. This is required for the object
- // copied to be valid on the device.
- // This behavior follows the CUDA spec
- // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
- // and matches the NVPTX implementation.
- return getNaturalAlignIndirect(Ty, /* byval */ true);
+ // Force copying aggregate type in kernel arguments by value when
+ // compiling CUDA targeting SPIR-V. This is required for the object
+ // copied to be valid on the device.
+ // This behavior follows the CUDA spec
+ // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
+ // and matches the NVPTX implementation.
+ return getNaturalAlignIndirect(Ty, /* byval */ true);
}
}
return classifyArgumentType(Ty);
>From daa76c36453b2e133d7d9496ca930d0eaa742fab Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 28 Aug 2024 17:25:29 +0100
Subject: [PATCH 4/8] Re-work SPIR-V support for memory scopes.
---
clang/lib/Basic/Targets/SPIR.h | 6 +
clang/lib/CodeGen/CGAtomic.cpp | 11 +-
clang/lib/CodeGen/Targets/SPIR.cpp | 40 +++
clang/test/CodeGen/scoped-atomic-ops.c | 336 ++++++++++++------
clang/test/Sema/scoped-atomic-ops.c | 1 +
llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 3 +-
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 39 +-
llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 18 +
llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 +
.../CodeGen/SPIRV/AtomicCompareExchange.ll | 6 +-
llvm/test/CodeGen/SPIRV/atomicrmw.ll | 26 +-
.../atomicrmw_faddfsub_double.ll | 7 +-
.../atomicrmw_faddfsub_float.ll | 7 +-
.../atomicrmw_faddfsub_half.ll | 7 +-
.../atomicrmw_fminfmax_double.ll | 7 +-
.../atomicrmw_fminfmax_float.ll | 7 +-
.../atomicrmw_fminfmax_half.ll | 7 +-
llvm/test/CodeGen/SPIRV/fence.ll | 10 +-
.../CodeGen/SPIRV/instructions/atomic-ptr.ll | 2 +-
.../test/CodeGen/SPIRV/instructions/atomic.ll | 33 +-
.../SPIRV/instructions/atomic_acqrel.ll | 4 +-
.../CodeGen/SPIRV/instructions/atomic_seq.ll | 4 +-
llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll | 163 +++++++++
23 files changed, 543 insertions(+), 203 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 37cf9d7921bac5..8a26db7971cba6 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -335,6 +335,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo {
PointerWidth = PointerAlign = 32;
SizeType = TargetInfo::UnsignedInt;
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
+ // SPIR-V has core support for atomic ops, and Int32 is always available;
+ // we take the maximum because it's possible the Host supports wider types.
+ MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
}
@@ -356,6 +359,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo {
PointerWidth = PointerAlign = 64;
SizeType = TargetInfo::UnsignedLong;
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+ // SPIR-V has core support for atomic ops, and Int64 is always available;
+ // we take the maximum because it's possible the Host supports wider types.
+ MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
}
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index fbe9569e50ef63..ba6ee4c0be3b7f 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -766,8 +766,17 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
// LLVM atomic instructions always have synch scope. If clang atomic
// expression has no scope operand, use default LLVM synch scope.
if (!ScopeModel) {
+ llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");
+ if (CGF.getLangOpts().OpenCL)
+ // OpenCL approach is: "The functions that do not have memory_scope argument
+ // have the same semantics as the corresponding functions with the
+ // memory_scope argument set to memory_scope_device." See ref.: //
+ // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
+ SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),
+ SyncScope::OpenCLDevice,
+ Order, CGF.getLLVMContext());
EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
- Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID(""));
+ Order, SS);
return;
}
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index cc52925e2e523f..a90741c0c0d324 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -58,6 +58,10 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
: CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
+ llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const override;
};
} // End anonymous namespace.
@@ -188,6 +192,42 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
}
}
+llvm::SyncScope::ID
+SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &,
+ SyncScope Scope,
+ llvm::AtomicOrdering,
+ llvm::LLVMContext &Ctx) const {
+ std::string Name;
+ switch (Scope) {
+ case SyncScope::HIPSingleThread:
+ case SyncScope::SingleScope:
+ Name = "singlethread";
+ break;
+ case SyncScope::HIPWavefront:
+ case SyncScope::OpenCLSubGroup:
+ case SyncScope::WavefrontScope:
+ Name = "subgroup";
+ break;
+ case SyncScope::HIPWorkgroup:
+ case SyncScope::OpenCLWorkGroup:
+ case SyncScope::WorkgroupScope:
+ Name = "workgroup";
+ break;
+ case SyncScope::HIPAgent:
+ case SyncScope::OpenCLDevice:
+ case SyncScope::DeviceScope:
+ Name = "device";
+ break;
+ case SyncScope::SystemScope:
+ case SyncScope::HIPSystem:
+ case SyncScope::OpenCLAllSVMDevices:
+ Name = "all_svm_devices";
+ break;
+ }
+
+ return Ctx.getOrInsertSyncScopeID(Name);
+}
+
/// Construct a SPIR-V target extension type for the given OpenCL image type.
static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType,
StringRef OpenCLName,
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index b0032046639b89..24f1613e8af4e8 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -1,12 +1,21 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
-// RUN: -fvisibility=hidden | FileCheck %s
+// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
+// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
-// CHECK-LABEL: define hidden i32 @fi1a(
-// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden i32 @fi1a(
+// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// SPIRV: define hidden spir_func i32 @fi1a(
+// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4
int fi1a(int *i) {
int v;
__scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
@@ -17,13 +26,18 @@ int fi1a(int *i) {
return v;
}
-// CHECK-LABEL: define hidden i32 @fi1b(
-// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
-//
+// AMDGCN-LABEL: define hidden i32 @fi1b(
+// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func i32 @fi1b(
+// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4
int fi1b(int *i) {
*i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
*i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
@@ -33,13 +47,18 @@ int fi1b(int *i) {
return *i;
}
-// CHECK-LABEL: define hidden void @fi2a(
-// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
-//
+// AMDGCN-LABEL: define hidden void @fi2a(
+// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi2a(
+// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4
void fi2a(int *i) {
int v = 1;
__scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
@@ -49,12 +68,18 @@ void fi2a(int *i) {
__scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
}
-// CHECK-LABEL: define hidden void @fi2b(
-// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden void @fi2b(
+// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi2b(
+// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4
void fi2b(int *i) {
__scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
__scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
@@ -63,15 +88,24 @@ void fi2b(int *i) {
__scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
}
-// CHECK-LABEL: define hidden void @fi3a(
-// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4
-// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden void @fi3a(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi3a(
+// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("all_svm_devices") monotonic, align 4
void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
*b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
@@ -83,15 +117,24 @@ void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
}
-// CHECK-LABEL: define hidden void @fi3b(
-// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4
-// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden void @fi3b(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi3b(
+// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("device") monotonic, align 4
+// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("device") monotonic, align 4
void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
*b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
@@ -103,15 +146,24 @@ void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
}
-// CHECK-LABEL: define hidden void @fi3c(
-// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4
-// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden void @fi3c(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi3c(
+// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup") monotonic, align 4
+// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup") monotonic, align 4
void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
*b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
@@ -123,15 +175,24 @@ void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
}
-// CHECK-LABEL: define hidden void @fi3d(
-// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4
-// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden void @fi3d(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi3d(
+// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("subgroup") monotonic, align 4
+// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("subgroup") monotonic, align 4
void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
*b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
@@ -143,15 +204,24 @@ void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
}
-// CHECK-LABEL: define hidden void @fi3e(
-// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4
-// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden void @fi3e(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func void @fi3e(
+// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread") monotonic, align 4
+// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread") monotonic, align 4
void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
*b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
@@ -163,8 +233,10 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi4a(
-// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi4a(
+// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a(
+// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4
_Bool fi4a(int *i) {
int cmp = 0;
int desired = 1;
@@ -173,8 +245,10 @@ _Bool fi4a(int *i) {
__MEMORY_SCOPE_SYSTEM);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi4b(
-// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi4b(
+// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4b(
+// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4
_Bool fi4b(int *i) {
int cmp = 0;
int desired = 1;
@@ -183,8 +257,10 @@ _Bool fi4b(int *i) {
__MEMORY_SCOPE_DEVICE);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi4c(
-// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi4c(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4c(
+// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4
_Bool fi4c(int *i) {
int cmp = 0;
int desired = 1;
@@ -193,8 +269,10 @@ _Bool fi4c(int *i) {
__MEMORY_SCOPE_WRKGRP);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi4d(
-// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi4d(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4d(
+// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4
_Bool fi4d(int *i) {
int cmp = 0;
int desired = 1;
@@ -203,8 +281,10 @@ _Bool fi4d(int *i) {
__MEMORY_SCOPE_WVFRNT);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi4e(
-// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi4e(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4e(
+// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4
_Bool fi4e(int *i) {
int cmp = 0;
int desired = 1;
@@ -213,8 +293,10 @@ _Bool fi4e(int *i) {
__MEMORY_SCOPE_SINGLE);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi5a(
-// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi5a(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a(
+// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4
_Bool fi5a(int *i) {
int cmp = 0;
return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE,
@@ -222,8 +304,10 @@ _Bool fi5a(int *i) {
__MEMORY_SCOPE_SYSTEM);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi5b(
-// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi5b(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5b(
+// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4
_Bool fi5b(int *i) {
int cmp = 0;
return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE,
@@ -231,101 +315,127 @@ _Bool fi5b(int *i) {
__MEMORY_SCOPE_DEVICE);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi5c(
-// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi5c(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5c(
+// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4
_Bool fi5c(int *i) {
int cmp = 0;
return __scoped_atomic_compare_exchange_n(
i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi5d(
-// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi5d(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5d(
+// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4
_Bool fi5d(int *i) {
int cmp = 0;
return __scoped_atomic_compare_exchange_n(
i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WVFRNT);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi5e(
-// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4
+// AMDGCN-LABEL: define hidden zeroext i1 @fi5e(
+// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5e(
+// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4
_Bool fi5e(int *i) {
int cmp = 0;
return __scoped_atomic_compare_exchange_n(
i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SINGLE);
}
-// CHECK-LABEL: define hidden i32 @fi6a(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden i32 @fi6a(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func i32 @fi6a(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4
int fi6a(int *c, int *d) {
int ret;
__scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
return ret;
}
-// CHECK-LABEL: define hidden i32 @fi6b(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden i32 @fi6b(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func i32 @fi6b(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4
int fi6b(int *c, int *d) {
int ret;
__scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
return ret;
}
-// CHECK-LABEL: define hidden i32 @fi6c(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden i32 @fi6c(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func i32 @fi6c(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4
int fi6c(int *c, int *d) {
int ret;
__scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP);
return ret;
}
-// CHECK-LABEL: define hidden i32 @fi6d(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden i32 @fi6d(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func i32 @fi6d(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4
int fi6d(int *c, int *d) {
int ret;
__scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT);
return ret;
}
-// CHECK-LABEL: define hidden i32 @fi6e(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// AMDGCN-LABEL: define hidden i32 @fi6e(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4
+// SPIRV-LABEL: define hidden spir_func i32 @fi6e(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4
int fi6e(int *c, int *d) {
int ret;
__scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE);
return ret;
}
-// CHECK-LABEL: define hidden zeroext i1 @fi7a(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1
+// AMDGCN-LABEL: define hidden zeroext i1 @fi7a(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 1
_Bool fi7a(_Bool *c) {
return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
__MEMORY_SCOPE_SYSTEM);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi7b(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1
+// AMDGCN-LABEL: define hidden zeroext i1 @fi7b(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7b(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("device") monotonic, align 1
_Bool fi7b(_Bool *c) {
return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
__MEMORY_SCOPE_DEVICE);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi7c(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1
+// AMDGCN-LABEL: define hidden zeroext i1 @fi7c(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7c(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup") monotonic, align 1
_Bool fi7c(_Bool *c) {
return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
__MEMORY_SCOPE_WRKGRP);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi7d(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1
+// AMDGCN-LABEL: define hidden zeroext i1 @fi7d(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7d(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("subgroup") monotonic, align 1
_Bool fi7d(_Bool *c) {
return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
__MEMORY_SCOPE_WVFRNT);
}
-// CHECK-LABEL: define hidden zeroext i1 @fi7e(
-// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1
+// AMDGCN-LABEL: define hidden zeroext i1 @fi7e(
+// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1
+// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7e(
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread") monotonic, align 1
_Bool fi7e(_Bool *c) {
- return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
+ return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
__MEMORY_SCOPE_SINGLE);
}
diff --git a/clang/test/Sema/scoped-atomic-ops.c b/clang/test/Sema/scoped-atomic-ops.c
index 59e638c646664c..33044aa256cb01 100644
--- a/clang/test/Sema/scoped-atomic-ops.c
+++ b/clang/test/Sema/scoped-atomic-ops.c
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 -x c -triple=amdgcn-amd-amdhsa -verify -fsyntax-only %s
// RUN: %clang_cc1 -x c -triple=x86_64-pc-linux-gnu -verify -fsyntax-only %s
+// RUN: %clang_cc1 -x c -triple=spirv64-unknown-unknown -verify -fsyntax-only %s
int fi1a(int *i) {
int v;
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index 4175f766ac69ad..7b7ccaf7cd7654 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -1353,7 +1353,8 @@ Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
SmallVector<Value *> Args;
for (auto &Op : I.operands())
Args.push_back(Op);
- Args.push_back(B.getInt32(I.getSyncScopeID()));
+ Args.push_back(B.getInt32(
+ static_cast<uint32_t>(getMemScope(I.getContext(), I.getSyncScopeID()))));
Args.push_back(B.getInt32(
static_cast<uint32_t>(getMemSemantics(I.getSuccessOrdering()))));
Args.push_back(B.getInt32(
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 9e10d947081cc3..8957a8b11e92a3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -33,7 +33,8 @@
#include "llvm/Support/Debug.h"
namespace {
-
+// TODO: consider removing this altogether and merely inserting the scopes
+// directly in SetupMF.
struct SyncScopeIDs {
llvm::SyncScope::ID Work_ItemSSID;
llvm::SyncScope::ID WorkGroupSSID;
@@ -43,11 +44,11 @@ struct SyncScopeIDs {
SyncScopeIDs() {}
SyncScopeIDs(llvm::LLVMContext &Context) {
- Work_ItemSSID = Context.getOrInsertSyncScopeID("work_item");
+ Work_ItemSSID = Context.getOrInsertSyncScopeID("singlethread");
WorkGroupSSID = Context.getOrInsertSyncScopeID("workgroup");
DeviceSSID = Context.getOrInsertSyncScopeID("device");
AllSVMDevicesSSID = Context.getOrInsertSyncScopeID("all_svm_devices");
- SubGroupSSID = Context.getOrInsertSyncScopeID("sub_group");
+ SubGroupSSID = Context.getOrInsertSyncScopeID("subgroup");
}
};
@@ -781,28 +782,6 @@ bool SPIRVInstructionSelector::selectBitcast(Register ResVReg,
return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast);
}
-static SPIRV::Scope::Scope getScope(SyncScope::ID Ord,
- const SyncScopeIDs &SSIDs) {
- if (Ord == SyncScope::SingleThread || Ord == SSIDs.Work_ItemSSID)
- return SPIRV::Scope::Invocation;
- else if (Ord == SyncScope::System || Ord == SSIDs.DeviceSSID)
- return SPIRV::Scope::Device;
- else if (Ord == SSIDs.WorkGroupSSID)
- return SPIRV::Scope::Workgroup;
- else if (Ord == SSIDs.AllSVMDevicesSSID)
- return SPIRV::Scope::CrossDevice;
- else if (Ord == SSIDs.SubGroupSSID)
- return SPIRV::Scope::Subgroup;
- else
- // OpenCL approach is: "The functions that do not have memory_scope argument
- // have the same semantics as the corresponding functions with the
- // memory_scope argument set to memory_scope_device." See ref.: //
- // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
- // In our case if the scope is unknown, assuming that SPIR-V code is to be
- // consumed in an OpenCL environment, we use the same approach and set the
- // scope to memory_scope_device.
- return SPIRV::Scope::Device;
-}
static void addMemoryOperands(MachineMemOperand *MemOp,
MachineInstrBuilder &MIB) {
@@ -957,7 +936,8 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg,
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
uint32_t Scope =
- static_cast<uint32_t>(getScope(MemOp->getSyncScopeID(), SSIDs));
+ static_cast<uint32_t>(getMemScope(GR.CurMF->getFunction().getContext(),
+ MemOp->getSyncScopeID()));
Register ScopeReg = buildI32Constant(Scope, I);
Register Ptr = I.getOperand(1).getReg();
@@ -1028,7 +1008,9 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const {
uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO));
Register MemSemReg = buildI32Constant(MemSem, I);
SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm());
- uint32_t Scope = static_cast<uint32_t>(getScope(Ord, SSIDs));
+ uint32_t Scope =
+ static_cast<uint32_t>(getMemScope(GR.CurMF->getFunction().getContext(),
+ Ord));
Register ScopeReg = buildI32Constant(Scope, I);
MachineBasicBlock &BB = *I.getParent();
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier))
@@ -1048,7 +1030,8 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
unsigned Scope =
- static_cast<uint32_t>(getScope(MemOp->getSyncScopeID(), SSIDs));
+ static_cast<uint32_t>(getMemScope(GR.CurMF->getFunction().getContext(),
+ MemOp->getSyncScopeID()));
ScopeReg = buildI32Constant(Scope, I);
unsigned ScSem = static_cast<uint32_t>(
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 927683ad7e32be..15f577f0e1fc39 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -251,6 +251,24 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
llvm_unreachable(nullptr);
}
+SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID) {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+
+ StringRef MemScope = SSNs[ID];
+ if (MemScope.empty() || MemScope == "all_svm_devices")
+ return SPIRV::Scope::CrossDevice;
+ if (MemScope == "device")
+ return SPIRV::Scope::Device;
+ if (MemScope == "workgroup")
+ return SPIRV::Scope::Workgroup;
+ if (MemScope == "subgroup")
+ return SPIRV::Scope::Subgroup;
+ if (MemScope == "singlethread")
+ return SPIRV::Scope::Invocation;
+ return SPIRV::Scope::Device; // Follow OpenCL convention for now.
+}
+
MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
const MachineRegisterInfo *MRI) {
MachineInstr *MI = MRI->getVRegDef(ConstReg);
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index c757af6b8aa72c..cad94fb36ee49a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -75,6 +75,8 @@ getMemSemanticsForStorageClass(SPIRV::StorageClass::StorageClass SC);
SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord);
+SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID);
+
// Find def instruction for the given ConstReg, walking through
// spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def
// of OpConstant instruction.
diff --git a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll
index 323afec7f35f87..9e19413a15db1f 100644
--- a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll
+++ b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll
@@ -1,7 +1,7 @@
; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV
; CHECK-SPIRV: %[[#Int:]] = OpTypeInt 32 0
-; CHECK-SPIRV-DAG: %[[#MemScope_Device:]] = OpConstant %[[#Int]] 1
+; CHECK-SPIRV-DAG: %[[#MemScope_AllSvmDevices:]] = OpConstant %[[#Int]] 0
; CHECK-SPIRV-DAG: %[[#MemSemEqual_SeqCst:]] = OpConstant %[[#Int]] 16
; CHECK-SPIRV-DAG: %[[#MemSemUnequal_Acquire:]] = OpConstant %[[#Int]] 2
; CHECK-SPIRV-DAG: %[[#Constant_456:]] = OpConstant %[[#Int]] 456
@@ -11,7 +11,7 @@
; CHECK-SPIRV-DAG: %[[#UndefStruct:]] = OpUndef %[[#Struct]]
; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr:]]
-; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_Device]]
+; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_AllSvmDevices]]
; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator:]]
; CHECK-SPIRV: %[[#Success:]] = OpIEqual %[[#]] %[[#Res]] %[[#Comparator]]
; CHECK-SPIRV: %[[#Composite_0:]] = OpCompositeInsert %[[#Struct]] %[[#Res]] %[[#UndefStruct]] 0
@@ -34,7 +34,7 @@ cmpxchg.continue: ; preds = %cmpxchg.store_expec
ret void
}
-; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_Device]]
+; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_AllSvmDevices]]
; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Constant_456]] %[[#Constant_128]]
; CHECK-SPIRV: %[[#Success_1:]] = OpIEqual %[[#]] %[[#Res_1]] %[[#Constant_128]]
; CHECK-SPIRV: %[[#Composite:]] = OpCompositeInsert %[[#Struct]] %[[#Res_1]] %[[#UndefStruct]] 0
diff --git a/llvm/test/CodeGen/SPIRV/atomicrmw.ll b/llvm/test/CodeGen/SPIRV/atomicrmw.ll
index 5f95a974ba671a..fa80bc97ab7cd3 100644
--- a/llvm/test/CodeGen/SPIRV/atomicrmw.ll
+++ b/llvm/test/CodeGen/SPIRV/atomicrmw.ll
@@ -5,8 +5,8 @@
; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
; CHECK: %[[#Int:]] = OpTypeInt 32 0
-; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1{{$}}
-; CHECK-DAG: %[[#MemSem_Relaxed:]] = OpConstant %[[#Int]] 0
+; CHECK-DAG: %[[#Scope_AllSvmDevices:]] = OpConstant %[[#Int]] 0{{$}}
+;; %[[#MemSem_Relaxed:]] = %[[#Scope_AllSvmDevices:]] Constant 0 re-used for scope & semantics
; CHECK-DAG: %[[#MemSem_Acquire:]] = OpConstant %[[#Int]] 2
; CHECK-DAG: %[[#MemSem_Release:]] = OpConstant %[[#Int]] 4{{$}}
; CHECK-DAG: %[[#MemSem_AcquireRelease:]] = OpConstant %[[#Int]] 8
@@ -25,37 +25,37 @@
define dso_local spir_func void @test_atomicrmw() local_unnamed_addr {
entry:
%0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 acq_rel
-; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]]
%1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst
-; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]]
+; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]]
%2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 monotonic
-; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#Scope_AllSvmDevices]] %[[#Value]]
%3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 acquire
-; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Acquire]] %[[#Value]]
%4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 release
-; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Release]] %[[#Value]]
%5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 acq_rel
-; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]]
%6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst
-; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]]
%7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 monotonic
-; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#Scope_AllSvmDevices]] %[[#Value]]
%8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 acquire
-; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Acquire]] %[[#Value]]
%9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 release
-; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Release]] %[[#Value]]
%10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 acq_rel
-; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]]
+; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]]
ret void
}
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll
index 14035a68c81aa4..c2ed2f8f62fc8d 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll
@@ -10,13 +10,14 @@
; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0
; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42
-; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
+; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]]
; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]]
-; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP64]] %[[Const42]]
-; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]]
+; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]]
; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll
index d34811496e5a15..075e63ea6de61a 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll
@@ -10,15 +10,16 @@
; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0
; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
+; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
; CHECK-DAG: %[[ScopeWorkgroup:[0-9]+]] = OpConstant %[[TyInt32]] 2
-; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
; CHECK-DAG: %[[WorkgroupMemory:[0-9]+]] = OpConstant %[[TyInt32]] 512
; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]]
; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]]
-; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]]
-; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]]
+; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]]
; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeWorkgroup]] %[[WorkgroupMemory]] %[[Const42]]
; CHECK: %[[Neg42:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]]
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll
index 7da99411ae5306..2c938409846d3a 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll
@@ -13,13 +13,14 @@
; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0
; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP16]] 20800
-; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
+; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]]
; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]]
-; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP16]] %[[Const42]]
-; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]]
+; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]]
; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll
index a2d0a594c861d2..fdc05f4eac06be 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll
@@ -10,12 +10,13 @@
; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0
; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42
-; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
+; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]]
; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]]
-; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
-; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll
index 896b7acc1c87b2..a7ff448a98b98d 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll
@@ -10,12 +10,13 @@
; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0
; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42
-; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
+; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]]
; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]]
-; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
-; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
+; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]]
; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]]
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll
index b3f48711707a14..d5576d1911a8b9 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll
@@ -10,12 +10,13 @@
; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0
; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0
; CHECK-DAG: %[[ConstHalf:[0-9]+]] = OpConstant %[[TyFP16]] 20800
-; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
+; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]]
; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16
+; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1
; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]]
; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]]
-; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]]
-; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]]
+; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]]
+; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]]
; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]]
; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]]
diff --git a/llvm/test/CodeGen/SPIRV/fence.ll b/llvm/test/CodeGen/SPIRV/fence.ll
index 5da58667f24f29..c7496c15f2c950 100644
--- a/llvm/test/CodeGen/SPIRV/fence.ll
+++ b/llvm/test/CodeGen/SPIRV/fence.ll
@@ -3,16 +3,16 @@
; CHECK-DAG: OpName %[[#GetScope:]] "_Z8getScopev"
; CHECK-DAG: %[[#Long:]] = OpTypeInt 32 0
-; CHECK-DAG: %[[#ScopeDevice:]] = OpConstant %[[#Long]] 1
; CHECK-DAG: %[[#WrkGrpConst2:]] = OpConstant %[[#Long]] 2
-; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3
+; CHECK-DAG: %[[#ScopeAllSvmDevices:]] = OpConstantNull %[[#Long]]
; CHECK-DAG: %[[#InvocationConst4:]] = OpConstant %[[#Long]] 4
; CHECK-DAG: %[[#Const8:]] = OpConstant %[[#Long]] 8
; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Long]] 16
+; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3
; CHECK-DAG: %[[#Const912:]] = OpConstant %[[#Long]] 912
-; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#WrkGrpConst2]]
-; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#InvocationConst4]]
-; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#Const8]]
+; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#WrkGrpConst2]]
+; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#InvocationConst4]]
+; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#Const8]]
; CHECK: OpMemoryBarrier %[[#InvocationConst4]] %[[#Const16]]
; CHECK: OpMemoryBarrier %[[#WrkGrpConst2]] %[[#InvocationConst4]]
; CHECK: OpFunctionEnd
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
index 9469d24b20af26..54d0843cbf2340 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll
@@ -9,7 +9,7 @@
; CHECK-DAG: %[[#LongTy:]] = OpTypeInt 64 0
; CHECK-DAG: %[[#PtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#LongTy]]
; CHECK-DAG: %[[#IntTy:]] = OpTypeInt 32 0
-; CHECK-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 1
+; CHECK-DAG: %[[#Scope:]] = OpConstantNull %[[#IntTy]]
; CHECK-DAG: %[[#MemSem:]] = OpConstant %[[#IntTy]] 8
; CHECK-DAG: %[[#PtrPtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#PtrLongTy]]
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
index 8c5c036351d977..724cbc38cb883f 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll
@@ -18,16 +18,17 @@
; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0
; CHECK-DAG: [[PtrI64Ty:%.*]] = OpTypePointer Generic [[I64Ty]]
-;; Device scope is encoded with constant 1
-; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
+;; AllSvmDevices scope is encoded with constant 0
+; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]]
+; CHECK-DAG: [[DEVICESCOPE:%.*]] = OpConstant [[I32Ty]] 1
;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0
-; CHECK-DAG: [[RELAXED:%.*]] = OpConstantNull [[I32Ty]]
+;; [[RELAXED:%.*]] = [[Scope]]
; CHECK: [[ADD]] = OpFunction [[I32Ty]]
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_add(i32* %ptr, i32 %val) {
@@ -39,7 +40,7 @@ define i32 @test_add(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_sub(i32* %ptr, i32 %val) {
@@ -51,7 +52,7 @@ define i32 @test_sub(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_min(i32* %ptr, i32 %val) {
@@ -63,7 +64,7 @@ define i32 @test_min(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_max(i32* %ptr, i32 %val) {
@@ -75,7 +76,7 @@ define i32 @test_max(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_umin(i32* %ptr, i32 %val) {
@@ -87,7 +88,7 @@ define i32 @test_umin(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_umax(i32* %ptr, i32 %val) {
@@ -99,7 +100,7 @@ define i32 @test_umax(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_and(i32* %ptr, i32 %val) {
@@ -111,7 +112,7 @@ define i32 @test_and(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_or(i32* %ptr, i32 %val) {
@@ -123,7 +124,7 @@ define i32 @test_or(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]]
; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]]
+; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]]
; CHECK-NEXT: OpReturnValue [[R]]
; CHECK-NEXT: OpFunctionEnd
define i32 @test_xor(i32* %ptr, i32 %val) {
@@ -135,10 +136,10 @@ define i32 @test_xor(i32* %ptr, i32 %val) {
; CHECK-NEXT: [[Arg1:%.*]] = OpFunctionParameter [[PtrI64Ty]]
; CHECK-NEXT: [[Arg2:%.*]] = OpFunctionParameter [[I64Ty]]
; CHECK-NEXT: OpLabel
-; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
-; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]]
+; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]]
+; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]]
+; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]]
+; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]]
; CHECK-NEXT: OpReturn
; CHECK-NEXT: OpFunctionEnd
define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) {
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll
index 07d1a5cf662eca..4d5aca6d404de6 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll
@@ -13,8 +13,8 @@
; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
-;; Device scope is encoded with constant 1
-; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
+;; AllSvmDevices scope is encoded with constant 0
+; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]]
;; "acq_rel" maps to the constant 8
; CHECK-DAG: [[ACQREL:%.*]] = OpConstant [[I32Ty]] 8
diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll
index 4078ffe1a10b87..9fd3d8e95b5f1e 100644
--- a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll
+++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll
@@ -13,8 +13,8 @@
; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0
; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]]
-;; Device scope is encoded with constant 1
-; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1
+;; AllSvmDevices scope is encoded with constant 0
+; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]]
;; "sequentially consistent" maps to constant 16
; CHECK-DAG: [[SEQ:%.*]] = OpConstant [[I32Ty]] 16
diff --git a/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll
new file mode 100644
index 00000000000000..ca8594a6b68ec0
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll
@@ -0,0 +1,163 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: %[[#Int:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#Scope_AllSVMDevices:]] = OpConstant %[[#Int]] 0
+; CHECK-DAG: %[[#Value:]] = OpConstant %[[#Int]] 42
+; CHECK-DAG: %[[#FPValue:]] = OpConstant %[[#Float]] 42
+; CHECK-DAG: %[[#Scope_Invocation:]] = OpConstant %[[#Int]] 4
+; CHECK-DAG: %[[#MemSem_SeqCst:]] = OpConstant %[[#Int]] 16
+; CHECK-DAG: %[[#Scope_Subgroup:]] = OpConstant %[[#Int]] 3
+; CHECK-DAG: %[[#Scope_Workgroup:]] = OpConstant %[[#Int]] 2
+; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1
+; CHECK-DAG: %[[#PointerType:]] = OpTypePointer CrossWorkgroup %[[#Int]]
+; CHECK-DAG: %[[#FPPointerType:]] = OpTypePointer CrossWorkgroup %[[#Float]]
+; CHECK-DAG: %[[#Pointer:]] = OpVariable %[[#PointerType]] CrossWorkgroup
+; CHECK-DAG: %[[#FPPointer:]] = OpVariable %[[#FPPointerType]] CrossWorkgroup
+
+ at ui = common dso_local addrspace(1) global i32 0, align 4
+ at f = common dso_local local_unnamed_addr addrspace(1) global float 0.000000e+00, align 4
+
+define dso_local spir_func void @test_singlethread_atomicrmw() local_unnamed_addr {
+entry:
+ %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+ %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+
+ ret void
+}
+
+define dso_local spir_func void @test_subgroup_atomicrmw() local_unnamed_addr {
+entry:
+ %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+ %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+
+ ret void
+}
+
+define dso_local spir_func void @test_workgroup_atomicrmw() local_unnamed_addr {
+entry:
+ %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+ %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+
+ ret void
+}
+
+define dso_local spir_func void @test_device_atomicrmw() local_unnamed_addr {
+entry:
+ %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+ %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+
+ ret void
+}
+
+define dso_local spir_func void @test_all_svm_devices_atomicrmw() local_unnamed_addr {
+entry:
+ %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]]
+ %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+ %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst
+ ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]]
+
+ ret void
+}
>From 25378a75752c134be0cef4999870819ba8ff8da1 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 28 Aug 2024 19:28:55 +0100
Subject: [PATCH 5/8] Fix formatting.
---
clang/lib/CodeGen/CGAtomic.cpp | 6 +++---
clang/lib/CodeGen/Targets/SPIR.cpp | 3 +--
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 16 ++++++----------
3 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index ba6ee4c0be3b7f..86f861c8d9a4e1 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -768,9 +768,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
if (!ScopeModel) {
llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");
if (CGF.getLangOpts().OpenCL)
- // OpenCL approach is: "The functions that do not have memory_scope argument
- // have the same semantics as the corresponding functions with the
- // memory_scope argument set to memory_scope_device." See ref.: //
+ // OpenCL approach is: "The functions that do not have memory_scope
+ // argument have the same semantics as the corresponding functions with
+ // the memory_scope argument set to memory_scope_device." See ref.:
// https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions
SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),
SyncScope::OpenCLDevice,
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index a90741c0c0d324..f150953eeac252 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -193,8 +193,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
}
llvm::SyncScope::ID
-SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &,
- SyncScope Scope,
+SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
llvm::AtomicOrdering,
llvm::LLVMContext &Ctx) const {
std::string Name;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 8957a8b11e92a3..17b39842de6cb9 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -782,7 +782,6 @@ bool SPIRVInstructionSelector::selectBitcast(Register ResVReg,
return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast);
}
-
static void addMemoryOperands(MachineMemOperand *MemOp,
MachineInstrBuilder &MIB) {
uint32_t SpvMemOp = static_cast<uint32_t>(SPIRV::MemoryOperand::None);
@@ -935,9 +934,8 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg,
unsigned NegateOpcode) const {
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
- uint32_t Scope =
- static_cast<uint32_t>(getMemScope(GR.CurMF->getFunction().getContext(),
- MemOp->getSyncScopeID()));
+ uint32_t Scope = static_cast<uint32_t>(getMemScope(
+ GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID()));
Register ScopeReg = buildI32Constant(Scope, I);
Register Ptr = I.getOperand(1).getReg();
@@ -1008,9 +1006,8 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const {
uint32_t MemSem = static_cast<uint32_t>(getMemSemantics(AO));
Register MemSemReg = buildI32Constant(MemSem, I);
SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm());
- uint32_t Scope =
- static_cast<uint32_t>(getMemScope(GR.CurMF->getFunction().getContext(),
- Ord));
+ uint32_t Scope = static_cast<uint32_t>(
+ getMemScope(GR.CurMF->getFunction().getContext(), Ord));
Register ScopeReg = buildI32Constant(Scope, I);
MachineBasicBlock &BB = *I.getParent();
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier))
@@ -1029,9 +1026,8 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg,
if (!isa<GIntrinsic>(I)) {
assert(I.hasOneMemOperand());
const MachineMemOperand *MemOp = *I.memoperands_begin();
- unsigned Scope =
- static_cast<uint32_t>(getMemScope(GR.CurMF->getFunction().getContext(),
- MemOp->getSyncScopeID()));
+ unsigned Scope = static_cast<uint32_t>(getMemScope(
+ GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID()));
ScopeReg = buildI32Constant(Scope, I);
unsigned ScSem = static_cast<uint32_t>(
>From 79acf40bb2f053ab41728cf5d4ab514346254c69 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 4 Sep 2024 13:52:40 +0100
Subject: [PATCH 6/8] Incorporate review feedback.
---
clang/lib/CodeGen/CGAtomic.cpp | 4 ++-
clang/lib/CodeGen/Targets/SPIR.cpp | 54 ++++++++++++++----------------
2 files changed, 28 insertions(+), 30 deletions(-)
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 86f861c8d9a4e1..45b01fdab4bfd3 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -766,7 +766,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
// LLVM atomic instructions always have synch scope. If clang atomic
// expression has no scope operand, use default LLVM synch scope.
if (!ScopeModel) {
- llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");
+ llvm::SyncScope::ID SS;
if (CGF.getLangOpts().OpenCL)
// OpenCL approach is: "The functions that do not have memory_scope
// argument have the same semantics as the corresponding functions with
@@ -775,6 +775,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(),
SyncScope::OpenCLDevice,
Order, CGF.getLLVMContext());
+ else
+ SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");
EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
Order, SS);
return;
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index f150953eeac252..f7ff78d394e6cc 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -63,6 +63,30 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const override;
};
+
+inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
+ switch (Scope) {
+ case SyncScope::HIPSingleThread:
+ case SyncScope::SingleScope:
+ return "singlethread";
+ case SyncScope::HIPWavefront:
+ case SyncScope::OpenCLSubGroup:
+ case SyncScope::WavefrontScope:
+ return "subgroup";
+ case SyncScope::HIPWorkgroup:
+ case SyncScope::OpenCLWorkGroup:
+ case SyncScope::WorkgroupScope:
+ return "workgroup";
+ case SyncScope::HIPAgent:
+ case SyncScope::OpenCLDevice:
+ case SyncScope::DeviceScope:
+ return "device";
+ case SyncScope::SystemScope:
+ case SyncScope::HIPSystem:
+ case SyncScope::OpenCLAllSVMDevices:
+ return "all_svm_devices";
+ }
+}
} // End anonymous namespace.
void CommonSPIRABIInfo::setCCs() {
@@ -196,35 +220,7 @@ llvm::SyncScope::ID
SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope,
llvm::AtomicOrdering,
llvm::LLVMContext &Ctx) const {
- std::string Name;
- switch (Scope) {
- case SyncScope::HIPSingleThread:
- case SyncScope::SingleScope:
- Name = "singlethread";
- break;
- case SyncScope::HIPWavefront:
- case SyncScope::OpenCLSubGroup:
- case SyncScope::WavefrontScope:
- Name = "subgroup";
- break;
- case SyncScope::HIPWorkgroup:
- case SyncScope::OpenCLWorkGroup:
- case SyncScope::WorkgroupScope:
- Name = "workgroup";
- break;
- case SyncScope::HIPAgent:
- case SyncScope::OpenCLDevice:
- case SyncScope::DeviceScope:
- Name = "device";
- break;
- case SyncScope::SystemScope:
- case SyncScope::HIPSystem:
- case SyncScope::OpenCLAllSVMDevices:
- Name = "all_svm_devices";
- break;
- }
-
- return Ctx.getOrInsertSyncScopeID(Name);
+ return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope));
}
/// Construct a SPIR-V target extension type for the given OpenCL image type.
>From e984939b1c5364b9bf7b09f2282c5503752bddf4 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 18 Sep 2024 20:13:59 +0100
Subject: [PATCH 7/8] No need for aliases / special handling of System scope.
---
clang/lib/CodeGen/CGAtomic.cpp | 2 +-
clang/lib/CodeGen/Targets/SPIR.cpp | 2 +-
clang/test/CodeGen/scoped-atomic-ops.c | 32 +++++++++++++-------------
3 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 45b01fdab4bfd3..a2a87e012b8b27 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -776,7 +776,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
SyncScope::OpenCLDevice,
Order, CGF.getLLVMContext());
else
- SS = CGF.getLLVMContext().getOrInsertSyncScopeID("");
+ SS = llvm::SyncScope::System;
EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
Order, SS);
return;
diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp
index f7ff78d394e6cc..764617acb8ba68 100644
--- a/clang/lib/CodeGen/Targets/SPIR.cpp
+++ b/clang/lib/CodeGen/Targets/SPIR.cpp
@@ -84,7 +84,7 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) {
case SyncScope::SystemScope:
case SyncScope::HIPSystem:
case SyncScope::OpenCLAllSVMDevices:
- return "all_svm_devices";
+ return "";
}
}
} // End anonymous namespace.
diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c
index 24f1613e8af4e8..cf98812a07e91d 100644
--- a/clang/test/CodeGen/scoped-atomic-ops.c
+++ b/clang/test/CodeGen/scoped-atomic-ops.c
@@ -11,7 +11,7 @@
// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4
// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4
// SPIRV: define hidden spir_func i32 @fi1a(
-// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4
// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4
// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4
// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4
@@ -33,7 +33,7 @@ int fi1a(int *i) {
// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
// SPIRV-LABEL: define hidden spir_func i32 @fi1b(
-// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4
// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4
// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4
@@ -54,7 +54,7 @@ int fi1b(int *i) {
// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
// SPIRV-LABEL: define hidden spir_func void @fi2a(
-// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4
// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4
// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4
@@ -75,7 +75,7 @@ void fi2a(int *i) {
// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4
// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4
// SPIRV-LABEL: define hidden spir_func void @fi2b(
-// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4
// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4
// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4
// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4
@@ -98,14 +98,14 @@ void fi2b(int *i) {
// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4
// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4
// SPIRV-LABEL: define hidden spir_func void @fi3a(
-// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("all_svm_devices") monotonic, align 4
-// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4
+// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] monotonic, align 4
+// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] monotonic, align 4
+// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] monotonic, align 4
+// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] monotonic, align 4
+// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] monotonic, align 4
+// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] monotonic, align 4
+// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] monotonic, align 4
void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
*a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
*b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
@@ -236,7 +236,7 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) {
// AMDGCN-LABEL: define hidden zeroext i1 @fi4a(
// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4
// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a(
-// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4
+// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4
_Bool fi4a(int *i) {
int cmp = 0;
int desired = 1;
@@ -296,7 +296,7 @@ _Bool fi4e(int *i) {
// AMDGCN-LABEL: define hidden zeroext i1 @fi5a(
// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4
// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a(
-// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4
+// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4
_Bool fi5a(int *i) {
int cmp = 0;
return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE,
@@ -348,7 +348,7 @@ _Bool fi5e(int *i) {
// AMDGCN-LABEL: define hidden i32 @fi6a(
// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4
// SPIRV-LABEL: define hidden spir_func i32 @fi6a(
-// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4
int fi6a(int *c, int *d) {
int ret;
__scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM);
@@ -398,7 +398,7 @@ int fi6e(int *c, int *d) {
// AMDGCN-LABEL: define hidden zeroext i1 @fi7a(
// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1
// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a(
-// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 1
+// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] monotonic, align 1
_Bool fi7a(_Bool *c) {
return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED,
__MEMORY_SCOPE_SYSTEM);
>From ced6877031ae7b0de11b7be147f4ea988caaf687 Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Wed, 18 Sep 2024 21:47:53 +0100
Subject: [PATCH 8/8] Remove & replace SyncScopeIDs struct.
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 24 ------------
llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 38 ++++++++++++-------
llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 +-
3 files changed, 25 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index faa05baf5c2527..7c71a18a9bf81a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -33,28 +33,6 @@
#include "llvm/IR/IntrinsicsSPIRV.h"
#include "llvm/Support/Debug.h"
-namespace {
-// TODO: consider removing this altogether and merely inserting the scopes
-// directly in SetupMF.
-struct SyncScopeIDs {
- llvm::SyncScope::ID Work_ItemSSID;
- llvm::SyncScope::ID WorkGroupSSID;
- llvm::SyncScope::ID DeviceSSID;
- llvm::SyncScope::ID AllSVMDevicesSSID;
- llvm::SyncScope::ID SubGroupSSID;
-
- SyncScopeIDs() {}
- SyncScopeIDs(llvm::LLVMContext &Context) {
- Work_ItemSSID = Context.getOrInsertSyncScopeID("singlethread");
- WorkGroupSSID = Context.getOrInsertSyncScopeID("workgroup");
- DeviceSSID = Context.getOrInsertSyncScopeID("device");
- AllSVMDevicesSSID = Context.getOrInsertSyncScopeID("all_svm_devices");
- SubGroupSSID = Context.getOrInsertSyncScopeID("subgroup");
- }
-};
-
-} // namespace
-
#define DEBUG_TYPE "spirv-isel"
using namespace llvm;
@@ -77,7 +55,6 @@ class SPIRVInstructionSelector : public InstructionSelector {
const RegisterBankInfo &RBI;
SPIRVGlobalRegistry &GR;
MachineRegisterInfo *MRI;
- SyncScopeIDs SSIDs;
MachineFunction *HasVRegsReset = nullptr;
/// We need to keep track of the number we give to anonymous global values to
@@ -306,7 +283,6 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
CodeGenCoverage *CoverageInfo,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
- SSIDs = SyncScopeIDs(MF.getFunction().getContext());
MRI = &MF.getRegInfo();
GR.setCurrentFunc(MF);
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 15f577f0e1fc39..2b20960653f92e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -251,22 +251,32 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
llvm_unreachable(nullptr);
}
-SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID) {
- SmallVector<StringRef> SSNs;
- Ctx.getSyncScopeNames(SSNs);
-
- StringRef MemScope = SSNs[ID];
- if (MemScope.empty() || MemScope == "all_svm_devices")
+SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) {
+ static const struct {
+ // Named by
+ // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id.
+ // We don't need aliases for Invocation and CrossDevice, as we already have
+ // them covered by "singlethread" and "" strings respectively (see
+ // implementation of LLVMContext::LLVMContext()).
+ llvm::SyncScope::ID SubGroupSSID =
+ Ctx.getOrInsertSyncScopeID("subgroup");
+ llvm::SyncScope::ID WorkGroupSSID =
+ Ctx.getOrInsertSyncScopeID("workgroup");
+ llvm::SyncScope::ID DeviceSSID =
+ Ctx.getOrInsertSyncScopeID("device");
+ } SSIDs{};
+
+ if (Id == llvm::SyncScope::SingleThread)
+ return SPIRV::Scope::Invocation;
+ else if (Id == llvm::SyncScope::System)
return SPIRV::Scope::CrossDevice;
- if (MemScope == "device")
- return SPIRV::Scope::Device;
- if (MemScope == "workgroup")
- return SPIRV::Scope::Workgroup;
- if (MemScope == "subgroup")
+ else if (Id == SSIDs.SubGroupSSID)
return SPIRV::Scope::Subgroup;
- if (MemScope == "singlethread")
- return SPIRV::Scope::Invocation;
- return SPIRV::Scope::Device; // Follow OpenCL convention for now.
+ else if (Id == SSIDs.WorkGroupSSID)
+ return SPIRV::Scope::Workgroup;
+ else if (Id == SSIDs.DeviceSSID)
+ return SPIRV::Scope::Device;
+ return SPIRV::Scope::CrossDevice;
}
MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h
index cad94fb36ee49a..3a291cf9f6d54f 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.h
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h
@@ -75,7 +75,7 @@ getMemSemanticsForStorageClass(SPIRV::StorageClass::StorageClass SC);
SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord);
-SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID);
+SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id);
// Find def instruction for the given ConstReg, walking through
// spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def
More information about the llvm-commits
mailing list