[clang] clang/AMDGPU: Use atomicrmw for ds fmin/fmax builtins (PR #96738)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 27 06:29:17 PDT 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96738
>From d16cc8ec8b9ad4780fcaa14a035193ee930cd8fe Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 11 Jun 2024 10:40:27 +0200
Subject: [PATCH 1/2] clang/AMDGPU: Use atomicrmw for ds fmin/fmax builtins
---
clang/lib/CodeGen/CGBuiltin.cpp | 40 ++++-------
clang/test/CodeGenCUDA/builtins-amdgcn.cu | 8 +--
.../test/CodeGenCUDA/builtins-spirv-amdgcn.cu | 8 +--
.../test/CodeGenOpenCL/builtins-amdgcn-vi.cl | 66 ++++++++++++++++++-
4 files changed, 86 insertions(+), 36 deletions(-)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 96dcf6283f9f8..98c2f70664ec7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18632,28 +18632,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
return Builder.CreateCall(F, { Src0, Builder.getFalse() });
}
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
- Intrinsic::ID Intrin;
- switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_ds_fminf:
- Intrin = Intrinsic::amdgcn_ds_fmin;
- break;
- case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
- Intrin = Intrinsic::amdgcn_ds_fmax;
- break;
- }
- llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
- llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
- llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
- llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
- llvm::Value *Src4 = EmitScalarExpr(E->getArg(4));
- llvm::Function *F = CGM.getIntrinsic(Intrin, { Src1->getType() });
- llvm::FunctionType *FTy = F->getFunctionType();
- llvm::Type *PTy = FTy->getParamType(0);
- Src0 = Builder.CreatePointerBitCastOrAddrSpaceCast(Src0, PTy);
- return Builder.CreateCall(F, { Src0, Src1, Src2, Src3, Src4 });
- }
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:
case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
@@ -19087,11 +19065,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: {
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
llvm::AtomicRMWInst::BinOp BinOp;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19109,6 +19089,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
BinOp = llvm::AtomicRMWInst::FAdd;
break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fminf:
+ BinOp = llvm::AtomicRMWInst::FMin;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:
+ BinOp = llvm::AtomicRMWInst::FMax;
+ break;
}
Address Ptr = CheckAtomicAlignment(*this, E);
@@ -19118,8 +19104,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
bool Volatile;
- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf) {
- // __builtin_amdgcn_ds_faddf has an explicit volatile argument
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||
+ BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {
+ // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument
Volatile =
cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
} else {
diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
index 132cbd27b08fc..2e88afac813f4 100644
--- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -98,7 +98,7 @@ __global__
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr
// CHECK-NEXT: store float [[SRC:%.*]], ptr [[SRC_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fmax ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP1]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
@@ -142,7 +142,7 @@ __global__ void test_ds_fadd(float src) {
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
@@ -245,10 +245,10 @@ __device__ void func(float *x);
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[TMP1]] to ptr addrspace(3)
// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP4]], ptr [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP5]]) #[[ATTR8:[0-9]+]]
+// CHECK-NEXT: call void @_Z4funcPf(ptr noundef [[TMP5]]) #[[ATTR7:[0-9]+]]
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
diff --git a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
index 7bb756a4a2731..32851805298f1 100644
--- a/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu
@@ -95,7 +95,7 @@ __global__
// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4)
// CHECK-NEXT: store float [[SRC:%.*]], ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = call contract addrspace(4) float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fmax ptr addrspace(3) @_ZZ12test_ds_fmaxfE6shared, float [[TMP0]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
@@ -139,7 +139,7 @@ __global__ void test_ds_fadd(float src) {
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(3)
// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = call contract addrspace(4) float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP4]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: ret void
//
@@ -236,10 +236,10 @@ __device__ void func(float *x);
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(3)
// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[SRC_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = call contract addrspace(4) float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) [[TMP2]], float [[TMP3]], i32 0, i32 0, i1 false)
+// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw fmin ptr addrspace(3) [[TMP2]], float [[TMP3]] monotonic, align 4
// CHECK-NEXT: store volatile float [[TMP4]], ptr addrspace(4) [[X_ASCAST]], align 4
// CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[SHARED_ADDR_ASCAST]], align 8
-// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP5]]) #[[ATTR7:[0-9]+]]
+// CHECK-NEXT: call spir_func addrspace(4) void @_Z4funcPf(ptr addrspace(4) noundef [[TMP5]]) #[[ATTR6:[0-9]+]]
// CHECK-NEXT: ret void
//
__global__ void test_ds_fmin_func(float src, float *__restrict shared) {
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 46af87f5e1d68..4e3b1e14ead62 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -158,23 +158,85 @@ void test_ds_faddf(local float *out, float src) {
}
// CHECK-LABEL: @test_ds_fmin
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
+// CHECK: atomicrmw volatile fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
+
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acquire, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src release, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
+
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmin ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
+
#if !defined(__SPIRV__)
void test_ds_fminf(local float *out, float src) {
#else
void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
#endif
*out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true);
+
+ // Test all orders.
+ *out = __builtin_amdgcn_ds_fminf(out, src, 1, 0, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 2, 0, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 3, 0, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 4, 0, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 5, 0, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 5, 0, false); // invalid
+
+ // Test all syncscopes.
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 1, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 2, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 3, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 4, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, 0, 5, false); // invalid
}
// CHECK-LABEL: @test_ds_fmax
-// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
+// CHECK: atomicrmw volatile fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
+
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acquire, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src release, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src acq_rel, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src seq_cst, align 4{{$}}
+
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("agent") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("workgroup") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("wavefront") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src syncscope("singlethread") monotonic, align 4{{$}}
+// CHECK: atomicrmw fmax ptr addrspace(3) %out, float %src monotonic, align 4{{$}}
+
#if !defined(__SPIRV__)
void test_ds_fmaxf(local float *out, float src) {
#else
void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
#endif
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true);
+
+ // Test all orders.
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 1, 0, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 2, 0, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 3, 0, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 4, 0, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 5, 0, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 5, 0, false); // invalid
+
+ // Test all syncscopes.
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 1, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 2, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 3, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 4, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 5, false); // invalid
}
// CHECK-LABEL: @test_s_memtime
>From ff880d533839b6623580855182da56b8e454fe53 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 26 Jun 2024 18:11:06 +0200
Subject: [PATCH 2/2] Use order and scope macros in test
---
.../test/CodeGenOpenCL/builtins-amdgcn-vi.cl | 44 +++++++++----------
1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
index 4e3b1e14ead62..5bd8f77a5930c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
@@ -183,19 +183,19 @@ void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
*out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, true);
// Test all orders.
- *out = __builtin_amdgcn_ds_fminf(out, src, 1, 0, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 2, 0, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 3, 0, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 4, 0, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 5, 0, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 5, 0, false); // invalid
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
// Test all syncscopes.
- *out = __builtin_amdgcn_ds_fminf(out, src, 0, 1, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 0, 2, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 0, 3, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 0, 4, false);
- *out = __builtin_amdgcn_ds_fminf(out, src, 0, 5, false); // invalid
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
+ *out = __builtin_amdgcn_ds_fminf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
}
// CHECK-LABEL: @test_ds_fmax
@@ -224,19 +224,19 @@ void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, true);
// Test all orders.
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 1, 0, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 2, 0, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 3, 0, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 4, 0, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 5, 0, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 5, 0, false); // invalid
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_CONSUME, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELEASE, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_ACQ_REL, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_SYSTEM, false); // invalid
// Test all syncscopes.
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 1, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 2, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 3, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 4, false);
- *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 5, false); // invalid
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE, false);
+ *out = __builtin_amdgcn_ds_fmaxf(out, src, __ATOMIC_RELAXED, 5, false); // invalid
}
// CHECK-LABEL: @test_s_memtime
More information about the cfe-commits
mailing list