[llvm] r315438 - [LICM] Disallow sinking of unordered atomic loads into loops

Wed Oct 11 00:26:45 PDT 2017

Author: mkazantsev
Date: Wed Oct 11 00:26:45 2017
New Revision: 315438

URL: http://llvm.org/viewvc/llvm-project?rev=315438&view=rev
Log:
[LICM] Disallow sinking of unordered atomic loads into loops

Sinking of unordered atomic load into loop must be disallowed because it turns
a single load into multiple loads. The relevant section of the documentation
is: http://llvm.org/docs/Atomics.html#unordered, specifically the Notes for
Optimizers section. Here is the full text of this section:

> Notes for optimizers
> In terms of the optimizer, this **prohibits any transformation that
> transforms a single load into multiple loads**, transforms a store into
> multiple stores, narrows a store, or stores a value which would not be
> stored otherwise. Some examples of unsafe optimizations are narrowing
> an assignment into a bitfield, rematerializing a load, and turning loads
> and stores into a memcpy call. Reordering unordered operations is safe,
> though, and optimizers should take advantage of that because unordered
> operations are common in languages that need them.

Patch by Daniil Suchkov!

Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D38392

Modified:
    llvm/trunk/lib/Transforms/Scalar/LICM.cpp
    llvm/trunk/test/Transforms/LICM/loopsink.ll

Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=315438&r1=315437&r2=315438&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Wed Oct 11 00:26:45 2017
@@ -577,10 +577,13 @@ bool llvm::canSinkOrHoistInst(Instructio
                               Loop *CurLoop, AliasSetTracker *CurAST,
                               LoopSafetyInfo *SafetyInfo,
                               OptimizationRemarkEmitter *ORE) {
+  // SafetyInfo is nullptr if we are checking for sinking from preheader to
+  // loop body.
+  const bool SinkingToLoopBody = !SafetyInfo;
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
-      return false; // Don't hoist volatile/atomic loads!
+      return false; // Don't sink/hoist volatile or ordered atomic loads!
 
     // Loads from constant memory are always safe to move, even if they end up
     // in the same alias set as something that ends up being modified.
@@ -589,6 +592,9 @@ bool llvm::canSinkOrHoistInst(Instructio
     if (LI->getMetadata(LLVMContext::MD_invariant_load))
       return true;
 
+    if (LI->isAtomic() && SinkingToLoopBody)
+      return false; // Don't sink unordered atomic loads to loop body.
+
     // This checks for an invariant.start dominating the load.
     if (isLoadInvariantInLoop(LI, DT, CurLoop))
       return true;
@@ -664,9 +670,9 @@ bool llvm::canSinkOrHoistInst(Instructio
       !isa<InsertValueInst>(I))
     return false;
 
-  // SafetyInfo is nullptr if we are checking for sinking from preheader to
-  // loop body. It will be always safe as there is no speculative execution.
-  if (!SafetyInfo)
+  // If we are checking for sinking from preheader to loop body it will be
+  // always safe as there is no speculative execution.
+  if (SinkingToLoopBody)
     return true;
 
   // TODO: Plumb the context instruction through to make hoisting and sinking

Modified: llvm/trunk/test/Transforms/LICM/loopsink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/loopsink.ll?rev=315438&r1=315437&r2=315438&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LICM/loopsink.ll (original)
+++ llvm/trunk/test/Transforms/LICM/loopsink.ll Wed Oct 11 00:26:45 2017
@@ -1,5 +1,5 @@
 ; RUN: opt -S -loop-sink < %s | FileCheck %s
-; RUN: opt -S -passes=loop-sink < %s | FileCheck %s
+; RUN: opt -S -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
 
 @g = global i32 0, align 4
 
@@ -278,6 +278,99 @@ define i32 @t5(i32, i32*) #0 !prof !0 {
 
 .exit:
   ret i32 10
+}
+
+;     b1
+;    /  \
+;   b2  b3
+;    \  /
+;     b4
+; preheader: 1000
+; b2: 15
+; b3: 7
+; Do not sink unordered atomic load to b2
+; CHECK: t6
+; CHECK: .preheader:
+; CHECK:  load atomic i32, i32* @g unordered, align 4
+; CHECK: .b2:
+; CHECK-NOT: load atomic i32, i32* @g unordered, align 4
+define i32 @t6(i32, i32) #0 !prof !0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load atomic i32, i32* @g unordered, align 4
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b3, !prof !1
+
+.b2:
+  %t1 = add nsw i32 %invariant, %iv
+  br label %.b4
+
+.b3:
+  %t2 = add nsw i32 %iv, 100
+  br label %.b4
+
+.b4:
+  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
+  %t3 = add nuw nsw i32 %iv, 1
+  %c2 = icmp eq i32 %t3, %p1
+  br i1 %c2, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
+}
+
+ at g_const = constant i32 0, align 4
+
+;     b1
+;    /  \
+;   b2  b3
+;    \  /
+;     b4
+; preheader: 1000
+; b2: 0.5
+; b3: 999.5
+; Sink unordered atomic load to b2. It is allowed to sink into loop unordered
+; load from constant.
+; CHECK: t7
+; CHECK: .preheader:
+; CHECK-NOT:  load atomic i32, i32* @g_const unordered, align 4
+; CHECK: .b2:
+; CHECK: load atomic i32, i32* @g_const unordered, align 4
+define i32 @t7(i32, i32) #0 !prof !0 {
+  %3 = icmp eq i32 %1, 0
+  br i1 %3, label %.exit, label %.preheader
+
+.preheader:
+  %invariant = load atomic i32, i32* @g_const unordered, align 4
+  br label %.b1
+
+.b1:
+  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
+  %c1 = icmp sgt i32 %iv, %0
+  br i1 %c1, label %.b2, label %.b3, !prof !1
+
+.b2:
+  %t1 = add nsw i32 %invariant, %iv
+  br label %.b4
+
+.b3:
+  %t2 = add nsw i32 %iv, 100
+  br label %.b4
+
+.b4:
+  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
+  %t3 = add nuw nsw i32 %iv, 1
+  %c2 = icmp eq i32 %t3, %p1
+  br i1 %c2, label %.b1, label %.exit, !prof !3
+
+.exit:
+  ret i32 10
 }
 
 declare i32 @foo()