[llvm] 0659000 - [LICM] Don't duplicate instructions just because they're free

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 28 05:31:30 PDT 2023


Author: Nikita Popov
Date: 2023-04-28T14:31:23+02:00
New Revision: 0659000ff79decc1173aac140d4b0325fe696c57

URL: https://github.com/llvm/llvm-project/commit/0659000ff79decc1173aac140d4b0325fe696c57
DIFF: https://github.com/llvm/llvm-project/commit/0659000ff79decc1173aac140d4b0325fe696c57.diff

LOG: [LICM] Don't duplicate instructions just because they're free

D37076 makes LICM duplicate instructions into exit blocks if the
instruction is free. For GEPs, the motivation appears to be that
this allows the GEP to be folded into addressing modes, while
non-foldable users outside the loop might prevent this. TBH I don't
think LICM is the place to do this (why doesn't CGP apply this
heuristic itself?) but at least I understand the motivation.

However, the transform is also applied to all other "free"
instructions, which are just that (removed during lowering and not
"folded" in some way). For such instructions, this transform seems
somewhere between useless, counter-productive (undoing CSE/GVN) and
actively incorrect. For example, this transform can duplicate freeze
instructions, which is illegal.

This patch limits the transform to just foldable GEPs, though we
might want to drop it from LICM entirely as a followup.

This is a small compile-time improvement, because querying TTI cost
model for every single instruction is expensive.

Differential Revision: https://reviews.llvm.org/D149136

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LICM.cpp
    llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
    llvm/test/Transforms/LICM/pr23608.ll
    llvm/test/Transforms/LICM/sink-foldable.ll
    llvm/test/Transforms/LICM/sinking.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index d0bb59ab7565d..9fc7210dd2a1f 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -149,10 +149,10 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
              "enable memory promotion."));
 
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
-                                  const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop,
-                                  bool LoopNestMode);
+static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
+                                      const LoopSafetyInfo *SafetyInfo,
+                                      TargetTransformInfo *TTI,
+                                      bool &FoldableInLoop, bool LoopNestMode);
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
                   MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
@@ -582,14 +582,15 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
       // outside of the loop.  In this case, it doesn't even matter if the
       // operands of the instruction are loop invariant.
       //
-      bool FreeInLoop = false;
+      bool FoldableInLoop = false;
       bool LoopNestMode = OutermostLoop != nullptr;
       if (!I.mayHaveSideEffects() &&
-          isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
-                                SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+          isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+                                    SafetyInfo, TTI, FoldableInLoop,
+                                    LoopNestMode) &&
           canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
         if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
-          if (!FreeInLoop) {
+          if (!FoldableInLoop) {
             ++II;
             salvageDebugInfo(I);
             eraseInstruction(I, *SafetyInfo, MSSAU);
@@ -1338,13 +1339,12 @@ static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
   return true;
 }
 
-/// Return true if the instruction is free in the loop.
-static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
+/// Return true if the instruction is foldable in the loop.
+static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
                          const TargetTransformInfo *TTI) {
-  InstructionCost CostI =
-      TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
-
   if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+    InstructionCost CostI =
+        TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
     if (CostI != TargetTransformInfo::TCC_Free)
       return false;
     // For a GEP, we cannot simply use getInstructionCost because currently
@@ -1361,7 +1361,7 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
     return true;
   }
 
-  return CostI == TargetTransformInfo::TCC_Free;
+  return false;
 }
 
 /// Return true if the only users of this instruction are outside of
@@ -1370,12 +1370,12 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
 ///
 /// We also return true if the instruction could be folded away in lowering.
 /// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
-                                  const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop,
-                                  bool LoopNestMode) {
+static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
+                                      const LoopSafetyInfo *SafetyInfo,
+                                      TargetTransformInfo *TTI,
+                                      bool &FoldableInLoop, bool LoopNestMode) {
   const auto &BlockColors = SafetyInfo->getBlockColors();
-  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
+  bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
     const Instruction *UI = cast<Instruction>(U);
     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
@@ -1402,8 +1402,8 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
     }
 
     if (CurLoop->contains(UI)) {
-      if (IsFree) {
-        FreeInLoop = true;
+      if (IsFoldable) {
+        FoldableInLoop = true;
         continue;
       }
       return false;

diff  --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index 1cf70192d6dc7..adbb956ba32ad 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -126,17 +126,17 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    li 6, 0
-; CHECK-NEXT:    lwz 7, 0(3)
+; CHECK-NEXT:    lwz 5, 0(3)
 ; CHECK-NEXT:    b .LBB2_2
 ; CHECK-NEXT:  .LBB2_1: # %atomicrmw.start
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    cmplw 5, 7
-; CHECK-NEXT:    mr 7, 5
 ; CHECK-NEXT:    beq 0, .LBB2_7
 ; CHECK-NEXT:  .LBB2_2: # %atomicrmw.start
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB2_5 Depth 2
-; CHECK-NEXT:    addi 5, 7, 1
+; CHECK-NEXT:    mr 7, 5
+; CHECK-NEXT:    addi 5, 5, 1
 ; CHECK-NEXT:    cmplw 7, 4
 ; CHECK-NEXT:    bc 12, 0, .LBB2_4
 ; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
@@ -169,18 +169,18 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    ld 7, 0(3)
+; CHECK-NEXT:    ld 5, 0(3)
 ; CHECK-NEXT:    li 6, 0
 ; CHECK-NEXT:    b .LBB3_2
 ; CHECK-NEXT:  .LBB3_1: # %atomicrmw.start
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    cmpld 5, 7
-; CHECK-NEXT:    mr 7, 5
 ; CHECK-NEXT:    beq 0, .LBB3_7
 ; CHECK-NEXT:  .LBB3_2: # %atomicrmw.start
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB3_5 Depth 2
-; CHECK-NEXT:    addi 5, 7, 1
+; CHECK-NEXT:    mr 7, 5
+; CHECK-NEXT:    addi 5, 5, 1
 ; CHECK-NEXT:    cmpld 7, 4
 ; CHECK-NEXT:    bc 12, 0, .LBB3_4
 ; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
@@ -334,19 +334,19 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK-LABEL: atomicrmw_udec_wrap_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    lwz 6, 0(3)
+; CHECK-NEXT:    lwz 5, 0(3)
 ; CHECK-NEXT:    b .LBB6_2
 ; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    cmplw 5, 6
-; CHECK-NEXT:    mr 6, 5
 ; CHECK-NEXT:    beq 0, .LBB6_7
 ; CHECK-NEXT:  .LBB6_2: # %atomicrmw.start
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB6_5 Depth 2
+; CHECK-NEXT:    mr 6, 5
 ; CHECK-NEXT:    cmpwi 6, 0
 ; CHECK-NEXT:    cmplw 1, 6, 4
-; CHECK-NEXT:    addi 5, 6, -1
+; CHECK-NEXT:    addi 5, 5, -1
 ; CHECK-NEXT:    cror 20, 2, 5
 ; CHECK-NEXT:    bc 12, 20, .LBB6_4
 ; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
@@ -379,19 +379,18 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK-LABEL: atomicrmw_udec_wrap_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    ld 6, 0(3)
+; CHECK-NEXT:    ld 5, 0(3)
 ; CHECK-NEXT:    b .LBB7_2
 ; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    cmpld 5, 6
-; CHECK-NEXT:    mr 6, 5
 ; CHECK-NEXT:    beq 0, .LBB7_7
 ; CHECK-NEXT:  .LBB7_2: # %atomicrmw.start
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB7_5 Depth 2
-; CHECK-NEXT:    cmpdi 6, 0
+; CHECK-NEXT:    mr. 6, 5
 ; CHECK-NEXT:    cmpld 1, 6, 4
-; CHECK-NEXT:    addi 5, 6, -1
+; CHECK-NEXT:    addi 5, 5, -1
 ; CHECK-NEXT:    cror 20, 2, 5
 ; CHECK-NEXT:    bc 12, 20, .LBB7_4
 ; CHECK-NEXT:  # %bb.3: # %atomicrmw.start

diff  --git a/llvm/test/Transforms/LICM/pr23608.ll b/llvm/test/Transforms/LICM/pr23608.ll
index 2477aa845ce93..4390e02ea05d8 100644
--- a/llvm/test/Transforms/LICM/pr23608.ll
+++ b/llvm/test/Transforms/LICM/pr23608.ll
@@ -25,9 +25,8 @@ define void @fn1() {
 ; NO_ASSUME-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0
 ; NO_ASSUME-NEXT:    br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]]
 ; NO_ASSUME:       bb13:
-; NO_ASSUME-NEXT:    [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ]
-; NO_ASSUME-NEXT:    [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64
-; NO_ASSUME-NEXT:    [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr
+; NO_ASSUME-NEXT:    [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ]
+; NO_ASSUME-NEXT:    [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr
 ; NO_ASSUME-NEXT:    call void @__msan_warning_noreturn()
 ; NO_ASSUME-NEXT:    unreachable
 ; NO_ASSUME:       bb15:
@@ -54,9 +53,8 @@ define void @fn1() {
 ; USE_ASSUME-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[TMP4]], 0
 ; USE_ASSUME-NEXT:    br i1 [[TOBOOL]], label [[BB13:%.*]], label [[BB15:%.*]]
 ; USE_ASSUME:       bb13:
-; USE_ASSUME-NEXT:    [[F_IBLOCK_LCSSA:%.*]] = phi ptr [ [[TMP]], [[BB2]] ]
-; USE_ASSUME-NEXT:    [[TMP4_LE:%.*]] = ptrtoint ptr [[F_IBLOCK_LCSSA]] to i64
-; USE_ASSUME-NEXT:    [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LE]] to ptr
+; USE_ASSUME-NEXT:    [[TMP4_LCSSA:%.*]] = phi i64 [ [[TMP4]], [[BB2]] ]
+; USE_ASSUME-NEXT:    [[TMP8_LE:%.*]] = inttoptr i64 [[TMP4_LCSSA]] to ptr
 ; USE_ASSUME-NEXT:    call void @__msan_warning_noreturn()
 ; USE_ASSUME-NEXT:    unreachable
 ; USE_ASSUME:       bb15:

diff  --git a/llvm/test/Transforms/LICM/sink-foldable.ll b/llvm/test/Transforms/LICM/sink-foldable.ll
index 5cc15e4f6dc7a..bf2cc77e6f47b 100644
--- a/llvm/test/Transforms/LICM/sink-foldable.ll
+++ b/llvm/test/Transforms/LICM/sink-foldable.ll
@@ -188,11 +188,11 @@ define ptr @test3(i64 %j, ptr readonly %P, ptr readnone %Q) {
 ; CHECK-NEXT:    [[P1:%.*]] = phi ptr [ [[ARRAYIDX0]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       loopexit1:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[IF_END]] ]
+; CHECK-NEXT:    [[TRUNC_LCSSA1:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ]
 ; CHECK-NEXT:    [[P_ADDR_LCSSA:%.*]] = phi ptr [ [[P_ADDR]], [[IF_END]] ]
-; CHECK-NEXT:    [[TRUNC_LE:%.*]] = trunc i64 [[ADD_LCSSA]] to i32
-; CHECK-NEXT:    [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LE]]
-; CHECK-NEXT:    call void @dummy(i32 [[TRUNC_LE]])
+; CHECK-NEXT:    [[TRUNC_LCSSA:%.*]] = phi i32 [ [[TRUNC]], [[IF_END]] ]
+; CHECK-NEXT:    [[ARRAYIDX1_LE:%.*]] = getelementptr inbounds ptr, ptr [[P_ADDR_LCSSA]], i32 [[TRUNC_LCSSA1]]
+; CHECK-NEXT:    call void @dummy(i32 [[TRUNC_LCSSA]])
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:
 ; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi ptr [ [[P1]], [[LOOPEXIT0]] ], [ [[ARRAYIDX1_LE]], [[LOOPEXIT1]] ], [ null, [[ENTRY:%.*]] ]

diff  --git a/llvm/test/Transforms/LICM/sinking.ll b/llvm/test/Transforms/LICM/sinking.ll
index 01eb3b72b973d..5888f2dcf65b0 100644
--- a/llvm/test/Transforms/LICM/sinking.ll
+++ b/llvm/test/Transforms/LICM/sinking.ll
@@ -1001,6 +1001,7 @@ out:
 declare void @use.i32(i32)
 declare void @use.i64(i64)
 
+; Don't duplicate freeze just because it's free.
 define i32 @duplicate_freeze(i1 %c, i32 %x) {
 ; CHECK-LABEL: @duplicate_freeze(
 ; CHECK-NEXT:  entry:
@@ -1010,8 +1011,8 @@ define i32 @duplicate_freeze(i1 %c, i32 %x) {
 ; CHECK-NEXT:    call void @use.i32(i32 [[FR]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[FR_LE:%.*]] = freeze i32 [[X]]
-; CHECK-NEXT:    ret i32 [[FR_LE]]
+; CHECK-NEXT:    [[FR_LCSSA:%.*]] = phi i32 [ [[FR]], [[LOOP]] ]
+; CHECK-NEXT:    ret i32 [[FR_LCSSA]]
 ;
 entry:
   br label %loop
@@ -1025,6 +1026,7 @@ exit:
   ret i32 %fr
 }
 
+; Don't duplicate ptrtoint just because it's free.
 define i64 @duplicate_ptrtoint(i1 %c, ptr %p) {
 ; CHECK-LABEL: @duplicate_ptrtoint(
 ; CHECK-NEXT:  entry:
@@ -1034,8 +1036,8 @@ define i64 @duplicate_ptrtoint(i1 %c, ptr %p) {
 ; CHECK-NEXT:    call void @use.i64(i64 [[PI]])
 ; CHECK-NEXT:    br i1 [[C:%.*]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[PI_LE:%.*]] = ptrtoint ptr [[P]] to i64
-; CHECK-NEXT:    ret i64 [[PI_LE]]
+; CHECK-NEXT:    [[PI_LCSSA:%.*]] = phi i64 [ [[PI]], [[LOOP]] ]
+; CHECK-NEXT:    ret i64 [[PI_LCSSA]]
 ;
 entry:
   br label %loop


        


More information about the llvm-commits mailing list