[Openmp-commits] [openmp] cb17c48 - [Attributor] Identify and remove no-op fences
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jun 5 17:14:13 PDT 2023
Author: Johannes Doerfert
Date: 2023-06-05T17:14:00-07:00
New Revision: cb17c48fdd8c0f8ab6cd7ad3f197b052db20d1f6
URL: https://github.com/llvm/llvm-project/commit/cb17c48fdd8c0f8ab6cd7ad3f197b052db20d1f6
DIFF: https://github.com/llvm/llvm-project/commit/cb17c48fdd8c0f8ab6cd7ad3f197b052db20d1f6.diff
LOG: [Attributor] Identify and remove no-op fences
The logic and implementation follows the removal of no-op barriers. If
the fence is not making updates visible, either to the world or the
current thread, it is not needed. Said differently, the fences we remove
do not establish synchronization (happens-before) edges.
This allows us to eliminate some of the regression caused by:
https://reviews.llvm.org/D145290
Added:
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
llvm/test/Transforms/OpenMP/barrier_removal.ll
openmp/libomptarget/test/jit/empty_kernel_lvl2.c
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 4aec1f51b6323..90ed050dde9b7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -119,6 +119,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
@@ -5178,6 +5179,10 @@ struct AAExecutionDomain
getExecutionDomain(const CallBase &CB) const = 0;
virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0;
+ /// Helper function to determine if \p FI is a no-op given the information
+ /// about its execution from \p ExecDomainAA.
+ virtual bool isNoOpFence(const FenceInst &FI) const = 0;
+
/// This function should return true if the type of the \p AA is
/// AAExecutionDomain.
static bool classof(const AbstractAttribute *AA) {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b36cb2e88b227..2a7dcf883ea01 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -4339,13 +4339,22 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
if (!isAssumedSideEffectFree(A, I)) {
- if (!isa_and_nonnull<StoreInst>(I))
+ if (!isa_and_nonnull<StoreInst>(I) && !isa_and_nonnull<FenceInst>(I))
indicatePessimisticFixpoint();
else
removeAssumedBits(HAS_NO_EFFECT);
}
}
+ bool isDeadFence(Attributor &A, FenceInst &FI) {
+ const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(*FI.getFunction()), *this, DepClassTy::NONE);
+ if (!ExecDomainAA || !ExecDomainAA->isNoOpFence(FI))
+ return false;
+ A.recordDependence(*ExecDomainAA, *this, DepClassTy::OPTIONAL);
+ return true;
+ }
+
bool isDeadStore(Attributor &A, StoreInst &SI,
SmallSetVector<Instruction *, 8> *AssumeOnlyInst = nullptr) {
// Lang ref now states volatile store is not UB/dead, let's skip them.
@@ -4399,6 +4408,9 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
if (isa_and_nonnull<StoreInst>(I))
if (isValidState())
return "assumed-dead-store";
+ if (isa_and_nonnull<FenceInst>(I))
+ if (isValidState())
+ return "assumed-dead-fence";
return AAIsDeadValueImpl::getAsStr();
}
@@ -4408,6 +4420,9 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
if (!isDeadStore(A, *SI))
return indicatePessimisticFixpoint();
+ } else if (auto *FI = dyn_cast_or_null<FenceInst>(I)) {
+ if (!isDeadFence(A, *FI))
+ return indicatePessimisticFixpoint();
} else {
if (!isAssumedSideEffectFree(A, I))
return indicatePessimisticFixpoint();
@@ -4443,6 +4458,11 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
return ChangeStatus::CHANGED;
}
+ if (auto *FI = dyn_cast<FenceInst>(I)) {
+ assert(isDeadFence(A, *FI));
+ A.deleteAfterManifest(*FI);
+ return ChangeStatus::CHANGED;
+ }
if (isAssumedSideEffectFree(A, I) && !isa<InvokeInst>(I)) {
A.deleteAfterManifest(*I);
return ChangeStatus::CHANGED;
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 1e86167d570c0..d23efb8193827 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -46,6 +47,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO/Attributor.h"
@@ -2641,6 +2643,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
return Changed;
}
+ bool isNoOpFence(const FenceInst &FI) const override {
+ return getState().isValidState() && !NonNoOpFences.count(&FI);
+ }
+
/// Merge barrier and assumption information from \p PredED into the successor
/// \p ED.
void
@@ -2811,6 +2817,10 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
R = V;
return !Eq;
}
+
+ /// Collection of fences known to be non-no-opt. All fences not in this set
+ /// can be assumed no-opt.
+ SmallPtrSet<const FenceInst *, 8> NonNoOpFences;
};
void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
@@ -2980,6 +2990,33 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
continue;
}
+ if (auto *FI = dyn_cast<FenceInst>(&I)) {
+ if (!ED.EncounteredNonLocalSideEffect) {
+ // An aligned fence without non-local side-effects is a no-op.
+ if (ED.IsReachedFromAlignedBarrierOnly)
+ continue;
+ // A non-aligned fence without non-local side-effects is a no-op
+ // if the ordering only publishes non-local side-effects (or less).
+ switch (FI->getOrdering()) {
+ case AtomicOrdering::NotAtomic:
+ continue;
+ case AtomicOrdering::Unordered:
+ continue;
+ case AtomicOrdering::Monotonic:
+ continue;
+ case AtomicOrdering::Acquire:
+ break;
+ case AtomicOrdering::Release:
+ continue;
+ case AtomicOrdering::AcquireRelease:
+ break;
+ case AtomicOrdering::SequentiallyConsistent:
+ break;
+ };
+ }
+ NonNoOpFences.insert(FI);
+ }
+
auto *CB = dyn_cast<CallBase>(&I);
bool IsNoSync = AA::isNoSyncInst(A, I, *this);
bool IsAlignedBarrier =
@@ -5206,6 +5243,10 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
continue;
}
+ if (auto *FI = dyn_cast<FenceInst>(&I)) {
+ A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI));
+ continue;
+ }
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::assume) {
A.getOrCreateAAFor<AAPotentialValues>(
diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll
index 8ac82f0975d0f..9b092ba408bde 100644
--- a/llvm/test/Transforms/OpenMP/barrier_removal.ll
+++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll
@@ -111,6 +111,7 @@ define void @pos_empty_8(i1 %c) "kernel" {
;
br i1 %c, label %t, label %f
t:
+ fence release
call void @llvm.amdgcn.s.barrier() "llvm.assume"="ompx_aligned_barrier"
br label %f
f:
@@ -133,23 +134,33 @@ define void @neg_empty_9(i1 %c) "kernel" {
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
; CHECK: t:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: fence release
; CHECK-NEXT: br label [[M:%.*]]
; CHECK: f:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: fence release
; CHECK-NEXT: br label [[M]]
; CHECK: m:
+; CHECK-NEXT: fence release
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
+; CHECK-NEXT: fence release
; CHECK-NEXT: ret void
;
br i1 %c, label %t, label %f
t:
+ fence release
call void @llvm.amdgcn.s.barrier()
+ fence release
br label %m
f:
+ fence release
call void @llvm.amdgcn.s.barrier()
+ fence release
br label %m
m:
+ fence release
call void @llvm.amdgcn.s.barrier()
+ fence release
ret void
}
; FIXME: We should remove the barrier
@@ -345,21 +356,27 @@ define void @neg_mem() "kernel" {
; CHECK-SAME: () #[[ATTR4]] {
; CHECK-NEXT: [[ARG:%.*]] = load ptr, ptr @GPtr, align 8
; CHECK-NEXT: [[A:%.*]] = load i32, ptr @G1, align 4
+; CHECK-NEXT: fence seq_cst
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: store i32 [[A]], ptr [[ARG]], align 4
+; CHECK-NEXT: fence release
; CHECK-NEXT: call void @aligned_barrier()
; CHECK-NEXT: [[B:%.*]] = load i32, ptr addrspacecast (ptr addrspace(1) @G2 to ptr), align 4
; CHECK-NEXT: store i32 [[B]], ptr @G1, align 4
+; CHECK-NEXT: fence acquire
; CHECK-NEXT: ret void
;
%arg = load ptr, ptr @GPtr
%a = load i32, ptr @G1
+ fence seq_cst
call void @aligned_barrier()
store i32 %a, ptr %arg
+ fence release
call void @aligned_barrier()
%G2c = addrspacecast ptr addrspace(1) @G2 to ptr
%b = load i32, ptr %G2c
store i32 %b, ptr @G1
+ fence acquire
call void @aligned_barrier()
ret void
}
@@ -397,27 +414,43 @@ define void @multiple_blocks_kernel_1(i1 %c0, i1 %c1) "kernel" {
; CHECK: m:
; CHECK-NEXT: ret void
;
+ fence acquire
call void @llvm.nvvm.barrier0()
+ fence release
call void @aligned_barrier()
+ fence seq_cst
br i1 %c0, label %t0, label %f0
t0:
+ fence seq_cst
call void @aligned_barrier()
+ fence seq_cst
br label %t0b
t0b:
+ fence seq_cst
call void @aligned_barrier()
+ fence seq_cst
br label %m
f0:
+ fence release
call void @aligned_barrier()
+ fence acquire
call void @llvm.nvvm.barrier0()
+ fence acquire
br i1 %c1, label %t1, label %f1
t1:
+ fence acquire
call void @aligned_barrier()
+ fence seq_cst
br label %m
f1:
+ fence seq_cst
call void @aligned_barrier()
+ fence acquire
br label %m
m:
+ fence seq_cst
call void @aligned_barrier()
+ fence seq_cst
ret void
}
diff --git a/openmp/libomptarget/test/jit/empty_kernel_lvl2.c b/openmp/libomptarget/test/jit/empty_kernel_lvl2.c
index 58181043e1bab..01270cf041de1 100644
--- a/openmp/libomptarget/test/jit/empty_kernel_lvl2.c
+++ b/openmp/libomptarget/test/jit/empty_kernel_lvl2.c
@@ -5,8 +5,7 @@
// RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \
// RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \
// RUN: %libomptarget-run-generic
-// TODO:
-// RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST
+// RUN: %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefix=FIRST
// RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
// RUN: -DTGT1_DIRECTIVE="target" \
// RUN: -DTGT2_DIRECTIVE="target" \
@@ -14,8 +13,7 @@
// RUN: env LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE=%t.pre.ll \
// RUN: LIBOMPTARGET_JIT_SKIP_OPT=true \
// RUN: %libomptarget-run-generic
-// TODO:
-// RUN: not %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND
+// RUN: %fcheck-plain-generic --input-file %t.pre.ll %S/empty_kernel.inc --check-prefixes=FIRST,SECOND
//
// RUN: %libomptarget-compileoptxx-generic -fopenmp-target-jit \
// RUN: -DTGT1_DIRECTIVE="target" \
More information about the Openmp-commits
mailing list