[llvm] 2d9b6a0 - [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration (#94291)

Fri Jun 14 08:51:13 PDT 2024

Author: Tobias Stadler
Date: 2024-06-14T17:51:09+02:00
New Revision: 2d9b6a01c7a77ee76a5c279901bca1659a550499

URL: https://github.com/llvm/llvm-project/commit/2d9b6a01c7a77ee76a5c279901bca1659a550499
DIFF: https://github.com/llvm/llvm-project/commit/2d9b6a01c7a77ee76a5c279901bca1659a550499.diff

LOG: [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration (#94291)

This adds an option to CombinerInfo to limit the number of iterations in the
Combiner. This option is then used to disable fixed-point iteration for the
AArch64O0PreLegalizerCombiner. The combines there are simple enough that
code quality impact should be minimal with the current heuristics
(instructions are processed from top to bottom of the basic block,
new/changed instructions are added back to the worklist). Test changes
are due to some instructions not being DCE'd, which has no actual impact
because InstructionSelect performs DCE as well.

AArch64 CTMark O0:
-0.9% geomean compile-time (instruction count)
no change in size..text for any of the benchmarks

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
    llvm/lib/CodeGen/GlobalISel/Combiner.cpp
    llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 13a8faf955a7e..2b0eb71f88082 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -49,6 +49,10 @@ struct CombinerInfo {
   bool EnableOptSize;
   /// Whether we're optimizing for minsize (-Oz).
   bool EnableMinSize;
+
+  /// The maximum number of times the Combiner will iterate over the
+  /// MachineFunction. Setting this to 0 enables fixed-point iteration.
+  unsigned MaxIterations = 0;
 };
 } // namespace llvm
 

diff  --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index d18e65a83484f..3310ce5455c97 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/GlobalISel/Combiner.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -27,6 +28,11 @@
 
 using namespace llvm;
 
+STATISTIC(NumOneIteration, "Number of functions with one iteration");
+STATISTIC(NumTwoIterations, "Number of functions with two iterations");
+STATISTIC(NumThreeOrMoreIterations,
+          "Number of functions with three or more iterations");
+
 namespace llvm {
 cl::OptionCategory GICombinerOptionCategory(
     "GlobalISel Combiner",
@@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() {
   bool MFChanged = false;
   bool Changed;
 
-  do {
+  unsigned Iteration = 0;
+  while (true) {
+    ++Iteration;
+    LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
+
     WorkList.clear();
 
     // Collect all instructions. Do a post order traversal for basic blocks and
@@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() {
       WLObserver->reportFullyCreatedInstrs();
     }
     MFChanged |= Changed;
-  } while (Changed);
+
+    if (!Changed) {
+      LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #"
+                        << Iteration << '\n');
+      break;
+    }
+    // Iterate until a fixed-point is reached if MaxIterations == 0,
+    // otherwise limit the number of iterations.
+    if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) {
+      LLVM_DEBUG(
+          dbgs() << "\nCombiner reached iteration limit after iteration #"
+                 << Iteration << '\n');
+      break;
+    }
+  }
+
+  if (Iteration == 1)
+    ++NumOneIteration;
+  else if (Iteration == 2)
+    ++NumTwoIterations;
+  else
+    ++NumThreeOrMoreIterations;
 
 #ifndef NDEBUG
   if (CSEInfo) {

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 17dd8f2314a2b..0ba3a543d114a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -165,6 +165,10 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
                      /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
                      F.hasOptSize(), F.hasMinSize());
+  // Disable fixed-point iteration in the Combiner. This improves compile-time
+  // at the cost of possibly missing optimizations. See PR#94291 for details.
+  CInfo.MaxIterations = 1;
+
   AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
                                          /*CSEInfo*/ nullptr, RuleConfig, ST);
   return Impl.combineMachineInstrs();

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
index 5ab086ffd2c13..c4e07de265edd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
@@ -28,6 +28,7 @@ define i32 @foo() {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.3
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -35,19 +36,19 @@ define i32 @foo() {
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
-  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK-NEXT:   G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2)
-  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
   ; CHECK-NEXT:   [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
-  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+  ; CHECK-NEXT:   G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
-  ; CHECK-NEXT:   G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3)
-  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3)
+  ; CHECK-NEXT:   G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.end:
-  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK-NEXT:   $w0 = COPY [[C6]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   $w0 = COPY [[C7]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = load i32, ptr @var1, align 4
@@ -84,6 +85,7 @@ define i32 @darwin_tls() {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]]
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.3
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -96,8 +98,8 @@ define i32 @darwin_tls() {
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.end:
-  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK-NEXT:   $w0 = COPY [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   $w0 = COPY [[C3]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = load i32, ptr @var1, align 4
@@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() {
   ; CHECK-NEXT:   [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() {
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
   ; CHECK-NEXT:   G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3)
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK-NEXT:   $w0 = COPY [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   $w0 = COPY [[C4]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = load i32, ptr @var1, align 4
@@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() {
   ; CHECK-NEXT:   [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() {
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
   ; CHECK-NEXT:   G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   $x0 = COPY [[C3]](s64)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C4]](s64)
   ; CHECK-NEXT:   RET_ReallyLR implicit $x0
 entry:
   %0 = load i64, ptr @var1_64, align 4
@@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) {
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
   ; CHECK-NEXT:   G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   $x0 = COPY [[C3]](s64)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C4]](s64)
   ; CHECK-NEXT:   RET_ReallyLR implicit $x0
 entry:
   %0 = load i64, ptr @var1_64, align 4
@@ -294,6 +299,7 @@ define i64 @f64_imm_cheap(double %a) {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -301,24 +307,24 @@ define i64 @f64_imm_cheap(double %a) {
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
-  ; CHECK-NEXT:   G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+  ; CHECK-NEXT:   G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.then2:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
   ; CHECK-NEXT:   [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
-  ; CHECK-NEXT:   G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
+  ; CHECK-NEXT:   G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
   ; CHECK-NEXT:   G_BR %bb.4
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
-  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
-  ; CHECK-NEXT:   G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
-  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   $x0 = COPY [[C6]](s64)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+  ; CHECK-NEXT:   G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C7]](s64)
   ; CHECK-NEXT:   RET_ReallyLR implicit $x0
 entry:
   %0 = load i64, ptr @var1_64, align 4