[PATCH] D143631: [LTO] Don't let InstCombine re-sink the vastly more expensive fdiv

David Sherwood via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 29 06:50:00 PDT 2023


david-arm updated this revision to Diff 535766.
david-arm edited the summary of this revision.
david-arm added a comment.

- Rebase


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143631/new/

https://reviews.llvm.org/D143631

Files:
  llvm/lib/Passes/PassBuilderPipelines.cpp
  llvm/test/Other/new-pm-lto-defaults.ll
  llvm/test/Transforms/PhaseOrdering/lto-licm.ll


Index: llvm/test/Transforms/PhaseOrdering/lto-licm.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/lto-licm.ll
+++ llvm/test/Transforms/PhaseOrdering/lto-licm.ll
@@ -4,6 +4,7 @@
 define void @hoist_fdiv(ptr %a, float %b) {
 ; CHECK-LABEL: @hoist_fdiv(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast float 1.000000e+00, [[B:%.*]]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
@@ -12,9 +13,9 @@
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]]
-; CHECK-NEXT:    store float [[TMP1]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store float [[TMP2]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
Index: llvm/test/Other/new-pm-lto-defaults.ll
===================================================================
--- llvm/test/Other/new-pm-lto-defaults.ll
+++ llvm/test/Other/new-pm-lto-defaults.ll
@@ -131,6 +131,9 @@
 ; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass on foo
 ; CHECK-O23SZ-NEXT: Running pass: AlignmentFromAssumptionsPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo
+; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass
+; CHECK-O23SZ-NEXT: Running pass: LCSSAPass
+; CHECK-O23SZ-NEXT: Running pass: LICMPass
 ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass on foo
 ; CHECK-O23SZ-NEXT: Running pass: LowerTypeTestsPass
Index: llvm/lib/Passes/PassBuilderPipelines.cpp
===================================================================
--- llvm/lib/Passes/PassBuilderPipelines.cpp
+++ llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1254,8 +1254,19 @@
   // alignment information, try to re-derive it here.
   FPM.addPass(AlignmentFromAssumptionsPass());
 
-  if (IsFullLTO)
+  if (IsFullLTO) {
     FPM.addPass(InstCombinePass());
+
+    // This is needed to work around problems that instcombine introduces, such
+    // as sinking expensive FP divides into loops containing multiplications
+    // using the divide result.
+    // For normal compilation without LTO we never run the InstCombine pass
+    // after LICM so we avoid this problem.
+    FPM.addPass(createFunctionToLoopPassAdaptor(
+        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                 /*AllowSpeculation=*/true),
+        /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+  }
 }
 
 ModulePassManager


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D143631.535766.patch
Type: text/x-patch
Size: 3024 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230629/897d9f61/attachment.bin>


More information about the llvm-commits mailing list