[clang] f22ba51 - [Remarks] Emit optimization remarks for atomics generating CAS loop
Anshil Gandhi via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 16 13:56:50 PDT 2021
Author: Anshil Gandhi
Date: 2021-08-16T14:56:01-06:00
New Revision: f22ba51873509b93732015176b778465f40c6db5
URL: https://github.com/llvm/llvm-project/commit/f22ba51873509b93732015176b778465f40c6db5
DIFF: https://github.com/llvm/llvm-project/commit/f22ba51873509b93732015176b778465f40c6db5.diff
LOG: [Remarks] Emit optimization remarks for atomics generating CAS loop
Implements ORE in AtomicExpand pass to report atomics generating a
compare and swap loop.
Differential Revision: https://reviews.llvm.org/D106891
Added:
clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
Modified:
llvm/lib/CodeGen/AtomicExpandPass.cpp
Removed:
################################################################################
diff --git a/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu b/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
new file mode 100644
index 0000000000000..96892286fd75e
--- /dev/null
+++ b/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -target-cpu gfx90a -Rpass=atomic-expand -S -o - 2>&1 | \
+// RUN: FileCheck %s --check-prefix=GFX90A-CAS
+
+// REQUIRES: amdgpu-registered-target
+
+#include "Inputs/cuda.h"
+#include <stdatomic.h>
+
+// GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at system memory scope
+// GFX90A-CAS-LABEL: _Z14atomic_add_casPf
+// GFX90A-CAS: flat_atomic_cmpswap v0, v[2:3], v[4:5] glc
+// GFX90A-CAS: s_cbranch_execnz
+__device__ float atomic_add_cas(float *p) {
+ return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
+}
diff --git a/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
new file mode 100644
index 0000000000000..127866e84e051
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN: -Rpass=atomic-expand -S -o - 2>&1 | \
+// RUN: FileCheck %s --check-prefix=REMARK
+
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \
+// RUN: -Rpass=atomic-expand -S -emit-llvm -o - 2>&1 | \
+// RUN: FileCheck %s --check-prefix=GFX90A-CAS
+
+// REQUIRES: amdgpu-registered-target
+
+typedef enum memory_order {
+ memory_order_relaxed = __ATOMIC_RELAXED,
+ memory_order_acquire = __ATOMIC_ACQUIRE,
+ memory_order_release = __ATOMIC_RELEASE,
+ memory_order_acq_rel = __ATOMIC_ACQ_REL,
+ memory_order_seq_cst = __ATOMIC_SEQ_CST
+} memory_order;
+
+typedef enum memory_scope {
+ memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+ memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+ memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+ memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
+ memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand]
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope [-Rpass=atomic-expand]
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope [-Rpass=atomic-expand]
+// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope [-Rpass=atomic-expand]
+// GFX90A-CAS-LABEL: @atomic_cas
+// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
+// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("agent-one-as") monotonic
+// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("one-as") monotonic
+// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("wavefront-one-as") monotonic
+float atomic_cas(__global atomic_float *d, float a) {
+ float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group);
+ float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device);
+ float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_all_svm_devices);
+ float ret4 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_sub_group);
+}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 125a3be585cb5..a27d43e43a855 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -570,7 +571,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ LLVMContext &Ctx = AI->getModule()->getContext();
+ TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+ switch (Kind) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
@@ -600,6 +603,18 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[AI->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[AI->getSyncScopeID()];
+ OptimizationRemarkEmitter ORE(AI->getFunction());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", AI->getFunction())
+ << "A compare and swap loop was generated for an atomic "
+ << AI->getOperationName(AI->getOperation()) << " operation at "
+ << MemScope << " memory scope";
+ });
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
new file mode 100644
index 0000000000000..240963cfe9009
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs --pass-remarks=atomic-expand \
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-CAS
+
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at system memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at agent memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at workgroup memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at wavefront memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at singlethread memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope
+; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at singlethread-one-as memory scope
+
+; GFX90A-CAS-LABEL: atomic_add_cas:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_agent:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_agent(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("agent") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_workgroup:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_workgroup(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_wavefront:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_wavefront(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_singlethread:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_singlethread(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_agent_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_agent_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("agent-one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_workgroup_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_workgroup_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup-one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_wavefront_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_wavefront_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront-one-as") monotonic, align 4
+ ret void
+}
+
+; GFX90A-CAS-LABEL: atomic_add_cas_singlethread_one_as:
+; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
+; GFX90A-CAS: s_cbranch_execnz
+define dso_local void @atomic_add_cas_singlethread_one_as(float* %p, float %q) {
+entry:
+ %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread-one-as") monotonic, align 4
+ ret void
+}
More information about the cfe-commits
mailing list