[llvm] b920407 - [LICM] Disable thread-safety checks in single-thread model
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 10 07:51:29 PDT 2022
Author: Shubham Narlawar
Date: 2022-10-10T16:51:16+02:00
New Revision: b920407cf5957dfd67acece563560141838fc8d8
URL: https://github.com/llvm/llvm-project/commit/b920407cf5957dfd67acece563560141838fc8d8
DIFF: https://github.com/llvm/llvm-project/commit/b920407cf5957dfd67acece563560141838fc8d8.diff
LOG: [LICM] Disable thread-safety checks in single-thread model
If the single-thread model is used, or the
-licm-force-thread-model-single flag is specified, skip checks
related to thread-safety. This means that store promotion for
conditionally executed stores only requires proof of
dereferenceability and writability, but not of thread-safety. For
example, this enables promotion of stores to (non-constant) globals,
as well as captured allocas.
Fixes https://github.com/llvm/llvm-project/issues/50537.
Differential Revision: https://reviews.llvm.org/D130466
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Transforms/Scalar/LICM.cpp
llvm/test/Transforms/LICM/promote-single-thread.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index e52381abd142c..a25061f06c148 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -372,6 +372,8 @@ class TargetTransformInfo {
unsigned getAssumedAddrSpace(const Value *V) const;
+ bool isSingleThreaded() const;
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const;
@@ -1581,6 +1583,7 @@ class TargetTransformInfo::Concept {
virtual bool
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+ virtual bool isSingleThreaded() const = 0;
virtual std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
@@ -1959,6 +1962,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getAssumedAddrSpace(V);
}
+ bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override {
return Impl.getPredicatedAddrSpace(V);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 8342a82197ea8..36e363ad9cb73 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -108,6 +108,8 @@ class TargetTransformInfoImplBase {
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ bool isSingleThreaded() const { return false; }
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const {
return std::make_pair(nullptr, -1);
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 6bdc620fc18f6..9fa5552466ce9 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -47,6 +47,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -287,6 +288,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
+ bool isSingleThreaded() const {
+ return getTLI()->getTargetMachine().Options.ThreadModel ==
+ ThreadModel::Single;
+ }
+
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const {
return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index afe70607d430e..efdd831cd585f 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -210,8 +210,9 @@ bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
- const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
- OptimizationRemarkEmitter *, bool AllowSpeculation);
+ const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
+ bool AllowSpeculation);
/// Does a BFS from a given node to all of its children inside a given loop.
/// The returned vector of nodes includes the starting point.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 9dd8fddda27ce..0d3058512e05e 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -273,6 +273,10 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
return TTIImpl->getAssumedAddrSpace(V);
}
+bool TargetTransformInfo::isSingleThreaded() const {
+ return TTIImpl->isSingleThreaded();
+}
+
std::pair<const Value *, unsigned>
TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
return TTIImpl->getPredicatedAddrSpace(V);
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 5aa63ce5d2165..edd7017941af9 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -76,6 +76,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -112,6 +113,10 @@ static cl::opt<bool> ControlFlowHoisting(
"licm-control-flow-hoisting", cl::Hidden, cl::init(false),
cl::desc("Enable control flow (and PHI) hoisting in LICM"));
+static cl::opt<bool>
+ SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
+ cl::desc("Force thread model single in LICM pass"));
+
static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "
@@ -489,7 +494,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
collectPromotionCandidates(MSSA, AA, L)) {
LocalPromoted |= promoteLoopAccessesToScalars(
PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
- DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
+ DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
+ LicmAllowSpeculation);
}
Promoted |= LocalPromoted;
} while (LocalPromoted);
@@ -1911,17 +1917,21 @@ bool isWritableObject(const Value *Object) {
if (auto *A = dyn_cast<Argument>(Object))
return A->hasByValAttr();
+ if (auto *G = dyn_cast<GlobalVariable>(Object))
+ return !G->isConstant();
+
// TODO: Noalias has nothing to do with writability, this should check for
// an allocator function.
return isNoAliasCall(Object);
}
-bool isThreadLocalObject(const Value *Object, const Loop *L,
- DominatorTree *DT) {
+bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
+ TargetTransformInfo *TTI) {
// The object must be function-local to start with, and then not captured
// before/in the loop.
- return isIdentifiedFunctionLocal(Object) &&
- isNotCapturedBeforeOrInLoop(Object, L, DT);
+ return (isIdentifiedFunctionLocal(Object) &&
+ isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
+ (TTI->isSingleThreaded() || SingleThread);
}
} // namespace
@@ -1937,9 +1947,9 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
- const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU,
- ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE,
- bool AllowSpeculation) {
+ const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
+ MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -2150,7 +2160,8 @@ bool llvm::promoteLoopAccessesToScalars(
// violating the memory model.
if (StoreSafety == StoreSafetyUnknown) {
Value *Object = getUnderlyingObject(SomePtr);
- if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT))
+ if (isWritableObject(Object) &&
+ isThreadLocalObject(Object, CurLoop, DT, TTI))
StoreSafety = StoreSafe;
}
diff --git a/llvm/test/Transforms/LICM/promote-single-thread.ll b/llvm/test/Transforms/LICM/promote-single-thread.ll
index b0127344a523a..0f32a4e751cea 100644
--- a/llvm/test/Transforms/LICM/promote-single-thread.ll
+++ b/llvm/test/Transforms/LICM/promote-single-thread.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -S -licm < %s | FileCheck %s --check-prefixes=CHECK,MT
+; RUN: opt -S -licm -licm-force-thread-model-single < %s | FileCheck %s --check-prefixes=CHECK,ST
@g = external global i32
@c = external constant i32
@@ -10,22 +11,40 @@ declare void @capture(ptr)
; mode only loads can be promoted, as a
diff erent thread might write to the
; global.
define void @promote_global(i1 %c, i1 %c2) {
-; CHECK-LABEL: @promote_global(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
-; CHECK: if:
-; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
-; CHECK-NEXT: store i32 [[V_INC]], ptr @g, align 4
-; CHECK-NEXT: br label [[LATCH]]
-; CHECK: latch:
-; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
-; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; MT-LABEL: @promote_global(
+; MT-NEXT: entry:
+; MT-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
+; MT-NEXT: br label [[LOOP:%.*]]
+; MT: loop:
+; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
+; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; MT: if:
+; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; MT-NEXT: store i32 [[V_INC]], ptr @g, align 4
+; MT-NEXT: br label [[LATCH]]
+; MT: latch:
+; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; MT: exit:
+; MT-NEXT: ret void
+;
+; ST-LABEL: @promote_global(
+; ST-NEXT: entry:
+; ST-NEXT: [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
+; ST-NEXT: br label [[LOOP:%.*]]
+; ST: loop:
+; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
+; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; ST: if:
+; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; ST-NEXT: br label [[LATCH]]
+; ST: latch:
+; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; ST: exit:
+; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
+; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr @g, align 4
+; ST-NEXT: ret void
;
entry:
br label %loop
@@ -87,24 +106,44 @@ exit:
; mode only loads can be promoted, as a
diff erent thread might write to the
; captured alloca.
define void @promote_captured_alloca(i1 %c, i1 %c2) {
-; CHECK-LABEL: @promote_captured_alloca(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @capture(ptr [[A]])
-; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
-; CHECK: if:
-; CHECK-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
-; CHECK-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
-; CHECK-NEXT: br label [[LATCH]]
-; CHECK: latch:
-; CHECK-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
-; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK: exit:
-; CHECK-NEXT: ret void
+; MT-LABEL: @promote_captured_alloca(
+; MT-NEXT: entry:
+; MT-NEXT: [[A:%.*]] = alloca i32, align 4
+; MT-NEXT: call void @capture(ptr [[A]])
+; MT-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
+; MT-NEXT: br label [[LOOP:%.*]]
+; MT: loop:
+; MT-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
+; MT-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; MT: if:
+; MT-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; MT-NEXT: store i32 [[V_INC]], ptr [[A]], align 4
+; MT-NEXT: br label [[LATCH]]
+; MT: latch:
+; MT-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; MT-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; MT: exit:
+; MT-NEXT: ret void
+;
+; ST-LABEL: @promote_captured_alloca(
+; ST-NEXT: entry:
+; ST-NEXT: [[A:%.*]] = alloca i32, align 4
+; ST-NEXT: call void @capture(ptr [[A]])
+; ST-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
+; ST-NEXT: br label [[LOOP:%.*]]
+; ST: loop:
+; ST-NEXT: [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
+; ST-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; ST: if:
+; ST-NEXT: [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; ST-NEXT: br label [[LATCH]]
+; ST: latch:
+; ST-NEXT: [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; ST-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; ST: exit:
+; ST-NEXT: [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
+; ST-NEXT: store i32 [[V_INC1_LCSSA]], ptr [[A]], align 4
+; ST-NEXT: ret void
;
entry:
%a = alloca i32
More information about the llvm-commits
mailing list