[llvm] 639d912 - [LICM] Allow load-only scalar promotion in the presence of unwinding

Fri Sep 2 00:27:28 PDT 2022

Author: Nikita Popov
Date: 2022-09-02T09:27:13+02:00
New Revision: 639d9122825d2f697d2dadc396c2f872f2a97921

URL: https://github.com/llvm/llvm-project/commit/639d9122825d2f697d2dadc396c2f872f2a97921
DIFF: https://github.com/llvm/llvm-project/commit/639d9122825d2f697d2dadc396c2f872f2a97921.diff

LOG: [LICM] Allow load-only scalar promotion in the presence of unwinding

Currently, we bail out of scalar promotion if the loop may unwind
and the memory may be visible on unwind. This is because we can't
insert stores of the promoted value on unwind edges.

However, nowadays scalar promotion also has support for only
promoting loads, while leaving stores in place. This kind of
promotion is safe even in the presence of unwinding.

Differential Revision: https://reviews.llvm.org/D133111

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/test/Transforms/LICM/guards.ll
    llvm/test/Transforms/LICM/scalar-promote-unwind.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 1fd66be06b5a7..9d98beaa19e66 100644

--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1957,9 +1957,14 @@ bool llvm::promoteLoopAccessesToScalars(
   // store is never executed, but the exit blocks are not executed either.
 
   bool DereferenceableInPH = false;
-  bool SafeToInsertStore = false;
   bool StoreIsGuanteedToExecute = false;
   bool FoundLoadToPromote = false;
+  // Goes from Unknown to either Safe or Unsafe, but can't switch between them.
+  enum {
+    StoreSafe,
+    StoreUnsafe,
+    StoreSafetyUnknown,
+  } StoreSafety = StoreSafetyUnknown;
 
   SmallVector<Instruction *, 64> LoopUses;
 
@@ -1981,7 +1986,7 @@ bool llvm::promoteLoopAccessesToScalars(
     // after return and thus can't possibly load from the object.
     Value *Object = getUnderlyingObject(SomePtr);
     if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
-      return false;
+      StoreSafety = StoreUnsafe;
   }
 
   // Check that all accesses to pointers in the alias set use the same type.
@@ -2040,7 +2045,8 @@ bool llvm::promoteLoopAccessesToScalars(
         StoreIsGuanteedToExecute |= GuaranteedToExecute;
         if (GuaranteedToExecute) {
           DereferenceableInPH = true;
-          SafeToInsertStore = true;
+          if (StoreSafety == StoreSafetyUnknown)
+            StoreSafety = StoreSafe;
           Alignment = std::max(Alignment, InstAlignment);
         }
 
@@ -2050,10 +2056,11 @@ bool llvm::promoteLoopAccessesToScalars(
         // introducing stores on paths that did not have them.
         // Note that this only looks at explicit exit blocks. If we ever
         // start sinking stores into unwind edges (see above), this will break.
-        if (!SafeToInsertStore)
-          SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
-            return DT->dominates(Store->getParent(), Exit);
-          });
+        if (StoreSafety == StoreSafetyUnknown &&
+            llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) {
+              return DT->dominates(Store->getParent(), Exit);
+            }))
+          StoreSafety = StoreSafe;
 
         // If the store is not guaranteed to execute, we may still get
         // deref info through it.
@@ -2105,22 +2112,22 @@ bool llvm::promoteLoopAccessesToScalars(
   // Check whether the location is thread-local. If it is, then we can insert
   // stores along paths which originally didn't have them without violating the
   // memory model.
-  if (!SafeToInsertStore) {
+  if (StoreSafety == StoreSafetyUnknown) {
     Value *Object = getUnderlyingObject(SomePtr);
-    SafeToInsertStore =
-        (isNoAliasCall(Object) || isa<AllocaInst>(Object) ||
+    if ((isNoAliasCall(Object) || isa<AllocaInst>(Object) ||
          (isa<Argument>(Object) && cast<Argument>(Object)->hasByValAttr())) &&
-        isNotCapturedBeforeOrInLoop(Object, CurLoop, DT);
+        isNotCapturedBeforeOrInLoop(Object, CurLoop, DT))
+      StoreSafety = StoreSafe;
   }
 
   // If we've still failed to prove we can sink the store, hoist the load
   // only, if possible.
-  if (!SafeToInsertStore && !FoundLoadToPromote)
+  if (StoreSafety != StoreSafe && !FoundLoadToPromote)
     // If we cannot hoist the load either, give up.
     return false;
 
   // Lets do the promotion!
-  if (SafeToInsertStore)
+  if (StoreSafety == StoreSafe)
     LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
                       << '\n');
   else
@@ -2146,7 +2153,7 @@ bool llvm::promoteLoopAccessesToScalars(
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
                         InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
                         Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
-                        SafeToInsertStore);
+                        StoreSafety == StoreSafe);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.

diff  --git a/llvm/test/Transforms/LICM/guards.ll b/llvm/test/Transforms/LICM/guards.ll
index 919f4151ea83f..7feb1291483d3 100644
--- a/llvm/test/Transforms/LICM/guards.ll
+++ b/llvm/test/Transforms/LICM/guards.ll
@@ -109,17 +109,18 @@ loop:
   br label %loop
 }
 
-; Hoist guard. Cannot hoist load because of aliasing.
+; Hoist guard. Cannot hoist load because of aliasing, but can promote.
 define void @test3(i1 %cond, i32* %ptr) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[COND:%.*]]) [ "deopt"(i32 0) ]
+; CHECK-NEXT:    [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[X:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[X_INC:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[VAL:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[ENTRY:%.*]] ], [ 0, [[LOOP]] ]
+; CHECK-NEXT:    [[X:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[X_INC:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
-; CHECK-NEXT:    [[X_INC]] = add i32 [[X]], [[VAL]]
+; CHECK-NEXT:    [[X_INC]] = add i32 [[X]], [[TMP0]]
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 

diff  --git a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
index 506b5baaa3f72..4b57e4029fbf3 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
@@ -10,11 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[ADD1]], 1
 ; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
 ; CHECK-NEXT:    br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
@@ -150,11 +151,12 @@ for.cond.cleanup:
 define void @test_sret(i32* noalias sret(i32) %a, i1 zeroext %y) uwtable {
 ; CHECK-LABEL: @test_sret(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[ADD1]], 1
 ; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
 ; CHECK-NEXT:    br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
@@ -466,17 +468,18 @@ fun.ret:
   ret void
 }
 
-; The malloc'ed memory can be captured and therefore not promoted.
+; The malloc'ed memory can be captured and therefore only loads can be promoted.
 define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
 ; CHECK-LABEL: @malloc_capture(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i8* @malloc(i64 4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32*
+; CHECK-NEXT:    [[DOTPROMOTED:%.*]] = load i32, i32* [[TMP0]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_LATCH:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[ADD1:%.*]] = phi i32 [ [[DOTPROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_LATCH:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_LATCH]] ]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[ADD1]], 1
 ; CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP0]], align 4
 ; CHECK-NEXT:    br label [[FOR_CALL:%.*]]
 ; CHECK:       for.call:
@@ -492,15 +495,15 @@ define void @malloc_capture(i32** noalias %A) personality i8* bitcast (i32 (...)
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[FUN_RET:%.*]]
 ; CHECK:       lpad:
-; CHECK-NEXT:    [[TMP2:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
 ; CHECK-NEXT:    catch i8* null
-; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i8*, i32 } [[TMP2]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1
 ; CHECK-NEXT:    br label [[CATCH:%.*]]
 ; CHECK:       catch:
-; CHECK-NEXT:    [[TMP5:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP3]])
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP0]] to i8*
-; CHECK-NEXT:    call void @free(i8* [[TMP6]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i8* @__cxa_begin_catch(i8* [[TMP2]])
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP0]] to i8*
+; CHECK-NEXT:    call void @free(i8* [[TMP5]])
 ; CHECK-NEXT:    call void @__cxa_end_catch()
 ; CHECK-NEXT:    br label [[FUN_RET]]
 ; CHECK:       fun.ret: