[llvm] b920407 - [LICM] Disable thread-safety checks in single-thread model

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 10 07:51:29 PDT 2022


Author: Shubham Narlawar
Date: 2022-10-10T16:51:16+02:00
New Revision: b920407cf5957dfd67acece563560141838fc8d8

URL: https://github.com/llvm/llvm-project/commit/b920407cf5957dfd67acece563560141838fc8d8
DIFF: https://github.com/llvm/llvm-project/commit/b920407cf5957dfd67acece563560141838fc8d8.diff

LOG: [LICM] Disable thread-safety checks in single-thread model

If the single-thread model is used, or the
-licm-force-thread-model-single flag is specified, skip checks
related to thread-safety. This means that store promotion for
conditionally executed stores only requires proof of
dereferenceability and writability, but not of thread-safety. For
example, this enables promotion of stores to (non-constant) globals,
as well as captured allocas.

Fixes https://github.com/llvm/llvm-project/issues/50537.

Differential Revision: https://reviews.llvm.org/D130466

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/test/Transforms/LICM/promote-single-thread.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index e52381abd142c..a25061f06c148 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -372,6 +372,8 @@ class TargetTransformInfo {
 
   unsigned getAssumedAddrSpace(const Value *V) const;
 
+  bool isSingleThreaded() const;
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const;
 
@@ -1581,6 +1583,7 @@ class TargetTransformInfo::Concept {
   virtual bool
   canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
   virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+  virtual bool isSingleThreaded() const = 0;
   virtual std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const = 0;
   virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
@@ -1959,6 +1962,8 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
     return Impl.getAssumedAddrSpace(V);
   }
 
+  bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const override {
     return Impl.getPredicatedAddrSpace(V);

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 8342a82197ea8..36e363ad9cb73 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -108,6 +108,8 @@ class TargetTransformInfoImplBase {
 
   unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
 
+  bool isSingleThreaded() const { return false; }
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const {
     return std::make_pair(nullptr, -1);

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 6bdc620fc18f6..9fa5552466ce9 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -47,6 +47,7 @@
 #include "llvm/Support/MachineValueType.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -287,6 +288,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
   }
 
+  bool isSingleThreaded() const {
+    return getTLI()->getTargetMachine().Options.ThreadModel ==
+           ThreadModel::Single;
+  }
+
   std::pair<const Value *, unsigned>
   getPredicatedAddrSpace(const Value *V) const {
     return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);

diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index afe70607d430e..efdd831cd585f 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -210,8 +210,9 @@ bool promoteLoopAccessesToScalars(
     const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
     SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
     PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC,
-    const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *,
-    OptimizationRemarkEmitter *, bool AllowSpeculation);
+    const TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+    MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *,
+    bool AllowSpeculation);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 9dd8fddda27ce..0d3058512e05e 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -273,6 +273,10 @@ unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
   return TTIImpl->getAssumedAddrSpace(V);
 }
 
+bool TargetTransformInfo::isSingleThreaded() const {
+  return TTIImpl->isSingleThreaded();
+}
+
 std::pair<const Value *, unsigned>
 TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
   return TTIImpl->getPredicatedAddrSpace(V);

diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 5aa63ce5d2165..edd7017941af9 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -76,6 +76,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -112,6 +113,10 @@ static cl::opt<bool> ControlFlowHoisting(
     "licm-control-flow-hoisting", cl::Hidden, cl::init(false),
     cl::desc("Enable control flow (and PHI) hoisting in LICM"));
 
+static cl::opt<bool>
+    SingleThread("licm-force-thread-model-single", cl::Hidden, cl::init(false),
+                 cl::desc("Force thread model single in LICM pass"));
+
 static cl::opt<uint32_t> MaxNumUsesTraversed(
     "licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
     cl::desc("Max num uses visited for identifying load "
@@ -489,7 +494,8 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
              collectPromotionCandidates(MSSA, AA, L)) {
           LocalPromoted |= promoteLoopAccessesToScalars(
               PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
-              DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
+              DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE,
+              LicmAllowSpeculation);
         }
         Promoted |= LocalPromoted;
       } while (LocalPromoted);
@@ -1911,17 +1917,21 @@ bool isWritableObject(const Value *Object) {
   if (auto *A = dyn_cast<Argument>(Object))
     return A->hasByValAttr();
 
+  if (auto *G = dyn_cast<GlobalVariable>(Object))
+    return !G->isConstant();
+
   // TODO: Noalias has nothing to do with writability, this should check for
   // an allocator function.
   return isNoAliasCall(Object);
 }
 
-bool isThreadLocalObject(const Value *Object, const Loop *L,
-                         DominatorTree *DT) {
+bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT,
+                         TargetTransformInfo *TTI) {
   // The object must be function-local to start with, and then not captured
   // before/in the loop.
-  return isIdentifiedFunctionLocal(Object) &&
-         isNotCapturedBeforeOrInLoop(Object, L, DT);
+  return (isIdentifiedFunctionLocal(Object) &&
+          isNotCapturedBeforeOrInLoop(Object, L, DT)) ||
+         (TTI->isSingleThreaded() || SingleThread);
 }
 
 } // namespace
@@ -1937,9 +1947,9 @@ bool llvm::promoteLoopAccessesToScalars(
     SmallVectorImpl<Instruction *> &InsertPts,
     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
-    const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU,
-    ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE,
-    bool AllowSpeculation) {
+    const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop,
+    MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          SafetyInfo != nullptr &&
@@ -2150,7 +2160,8 @@ bool llvm::promoteLoopAccessesToScalars(
   // violating the memory model.
   if (StoreSafety == StoreSafetyUnknown) {
     Value *Object = getUnderlyingObject(SomePtr);
-    if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT))
+    if (isWritableObject(Object) &&
+        isThreadLocalObject(Object, CurLoop, DT, TTI))
       StoreSafety = StoreSafe;
   }
 

diff  --git a/llvm/test/Transforms/LICM/promote-single-thread.ll b/llvm/test/Transforms/LICM/promote-single-thread.ll
index b0127344a523a..0f32a4e751cea 100644
--- a/llvm/test/Transforms/LICM/promote-single-thread.ll
+++ b/llvm/test/Transforms/LICM/promote-single-thread.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -S -licm < %s | FileCheck %s --check-prefixes=CHECK,MT
+; RUN: opt -S -licm -licm-force-thread-model-single < %s | FileCheck %s --check-prefixes=CHECK,ST
 
 @g = external global i32
 @c = external constant i32
@@ -10,22 +11,40 @@ declare void @capture(ptr)
 ; mode only loads can be promoted, as a 
diff erent thread might write to the
 ; global.
 define void @promote_global(i1 %c, i1 %c2) {
-; CHECK-LABEL: @promote_global(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
-; CHECK:       if:
-; CHECK-NEXT:    [[V_INC:%.*]] = add i32 [[V_INC2]], 1
-; CHECK-NEXT:    store i32 [[V_INC]], ptr @g, align 4
-; CHECK-NEXT:    br label [[LATCH]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
+; MT-LABEL: @promote_global(
+; MT-NEXT:  entry:
+; MT-NEXT:    [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
+; MT-NEXT:    br label [[LOOP:%.*]]
+; MT:       loop:
+; MT-NEXT:    [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
+; MT-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; MT:       if:
+; MT-NEXT:    [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; MT-NEXT:    store i32 [[V_INC]], ptr @g, align 4
+; MT-NEXT:    br label [[LATCH]]
+; MT:       latch:
+; MT-NEXT:    [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; MT-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; MT:       exit:
+; MT-NEXT:    ret void
+;
+; ST-LABEL: @promote_global(
+; ST-NEXT:  entry:
+; ST-NEXT:    [[G_PROMOTED:%.*]] = load i32, ptr @g, align 4
+; ST-NEXT:    br label [[LOOP:%.*]]
+; ST:       loop:
+; ST-NEXT:    [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[G_PROMOTED]], [[ENTRY:%.*]] ]
+; ST-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; ST:       if:
+; ST-NEXT:    [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; ST-NEXT:    br label [[LATCH]]
+; ST:       latch:
+; ST-NEXT:    [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; ST-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; ST:       exit:
+; ST-NEXT:    [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
+; ST-NEXT:    store i32 [[V_INC1_LCSSA]], ptr @g, align 4
+; ST-NEXT:    ret void
 ;
 entry:
   br label %loop
@@ -87,24 +106,44 @@ exit:
 ; mode only loads can be promoted, as a 
diff erent thread might write to the
 ; captured alloca.
 define void @promote_captured_alloca(i1 %c, i1 %c2) {
-; CHECK-LABEL: @promote_captured_alloca(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @capture(ptr [[A]])
-; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    br label [[LOOP:%.*]]
-; CHECK:       loop:
-; CHECK-NEXT:    [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
-; CHECK:       if:
-; CHECK-NEXT:    [[V_INC:%.*]] = add i32 [[V_INC2]], 1
-; CHECK-NEXT:    store i32 [[V_INC]], ptr [[A]], align 4
-; CHECK-NEXT:    br label [[LATCH]]
-; CHECK:       latch:
-; CHECK-NEXT:    [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
+; MT-LABEL: @promote_captured_alloca(
+; MT-NEXT:  entry:
+; MT-NEXT:    [[A:%.*]] = alloca i32, align 4
+; MT-NEXT:    call void @capture(ptr [[A]])
+; MT-NEXT:    [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
+; MT-NEXT:    br label [[LOOP:%.*]]
+; MT:       loop:
+; MT-NEXT:    [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
+; MT-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; MT:       if:
+; MT-NEXT:    [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; MT-NEXT:    store i32 [[V_INC]], ptr [[A]], align 4
+; MT-NEXT:    br label [[LATCH]]
+; MT:       latch:
+; MT-NEXT:    [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; MT-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; MT:       exit:
+; MT-NEXT:    ret void
+;
+; ST-LABEL: @promote_captured_alloca(
+; ST-NEXT:  entry:
+; ST-NEXT:    [[A:%.*]] = alloca i32, align 4
+; ST-NEXT:    call void @capture(ptr [[A]])
+; ST-NEXT:    [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 4
+; ST-NEXT:    br label [[LOOP:%.*]]
+; ST:       loop:
+; ST-NEXT:    [[V_INC2:%.*]] = phi i32 [ [[V_INC1:%.*]], [[LATCH:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ]
+; ST-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[LATCH]]
+; ST:       if:
+; ST-NEXT:    [[V_INC:%.*]] = add i32 [[V_INC2]], 1
+; ST-NEXT:    br label [[LATCH]]
+; ST:       latch:
+; ST-NEXT:    [[V_INC1]] = phi i32 [ [[V_INC]], [[IF]] ], [ [[V_INC2]], [[LOOP]] ]
+; ST-NEXT:    br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[LOOP]]
+; ST:       exit:
+; ST-NEXT:    [[V_INC1_LCSSA:%.*]] = phi i32 [ [[V_INC1]], [[LATCH]] ]
+; ST-NEXT:    store i32 [[V_INC1_LCSSA]], ptr [[A]], align 4
+; ST-NEXT:    ret void
 ;
 entry:
   %a = alloca i32


        


More information about the llvm-commits mailing list