[llvm] [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration (PR #94291)

Thu Jun 6 13:07:10 PDT 2024

https://github.com/tobias-stadler updated https://github.com/llvm/llvm-project/pull/94291

>From 3cdefa8af1199da369104744c0429667deb4cb3f Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Tue, 4 Jun 2024 00:09:11 +0200
Subject: [PATCH 1/3] [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner:
 Disable fixed-point iteration

This adds an option to CombinerInfo to turn off the fixed-point
iteration in the Combiner. This option is then used for
AArch64O0PreLegalizerCombiner. The combines there are simple enough that
code quality impact should be minimal with the current heuristics
(instructions are processed from top to bottom of the basic block,
new/changed instructions are added back to the worklist).
Test changes are due to some instructions not being DCE'd, which has no
actual impact because InstructionSelect performs DCE as well.

AArch64 CTMark O0:
-0.9% compile-time (instruction count)
no impact on size..text
---
 .../llvm/CodeGen/GlobalISel/CombinerInfo.h    |  5 ++
 llvm/lib/CodeGen/GlobalISel/Combiner.cpp      |  2 +-
 .../GISel/AArch64O0PreLegalizerCombiner.cpp   |  2 +
 .../AArch64/GlobalISel/localizer-arm64-tti.ll | 54 ++++++++++---------
 4 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 13a8faf955a7e..63cbbb41dedaf 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -49,6 +49,11 @@ struct CombinerInfo {
   bool EnableOptSize;
   /// Whether we're optimizing for minsize (-Oz).
   bool EnableMinSize;
+
+  /// Whether the Combiner repeatedly iterates over all instructions until no
+  /// combine can be applied. Disabling this improves compile-time, but the IR
+  /// might not get transformed completely.
+  bool EnableFixedPointIteration = true;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index d18e65a83484f..745d355db2df1 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -166,7 +166,7 @@ bool Combiner::combineMachineInstrs() {
       WLObserver->reportFullyCreatedInstrs();
     }
     MFChanged |= Changed;
-  } while (Changed);
+  } while (Changed && CInfo.EnableFixedPointIteration);
 
 #ifndef NDEBUG
   if (CSEInfo) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 17dd8f2314a2b..83e4b023673b4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -165,6 +165,8 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
                      /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
                      F.hasOptSize(), F.hasMinSize());
+  CInfo.EnableFixedPointIteration = false;
+
   AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
                                          /*CSEInfo*/ nullptr, RuleConfig, ST);
   return Impl.combineMachineInstrs();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
index 5ab086ffd2c13..c4e07de265edd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
@@ -28,6 +28,7 @@ define i32 @foo() {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.3
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -35,19 +36,19 @@ define i32 @foo() {
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
-  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
-  ; CHECK-NEXT:   G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2)
-  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
   ; CHECK-NEXT:   [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
-  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+  ; CHECK-NEXT:   G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
-  ; CHECK-NEXT:   G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3)
-  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+  ; CHECK-NEXT:   G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3)
+  ; CHECK-NEXT:   G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.end:
-  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK-NEXT:   $w0 = COPY [[C6]](s32)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   $w0 = COPY [[C7]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = load i32, ptr @var1, align 4
@@ -84,6 +85,7 @@ define i32 @darwin_tls() {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]]
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.3
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -96,8 +98,8 @@ define i32 @darwin_tls() {
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.end:
-  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK-NEXT:   $w0 = COPY [[C2]](s32)
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   $w0 = COPY [[C3]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = load i32, ptr @var1, align 4
@@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() {
   ; CHECK-NEXT:   [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() {
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
   ; CHECK-NEXT:   G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3)
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-  ; CHECK-NEXT:   $w0 = COPY [[C3]](s32)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; CHECK-NEXT:   $w0 = COPY [[C4]](s32)
   ; CHECK-NEXT:   RET_ReallyLR implicit $w0
 entry:
   %0 = load i32, ptr @var1, align 4
@@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() {
   ; CHECK-NEXT:   [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() {
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
   ; CHECK-NEXT:   G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   $x0 = COPY [[C3]](s64)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C4]](s64)
   ; CHECK-NEXT:   RET_ReallyLR implicit $x0
 entry:
   %0 = load i64, ptr @var1_64, align 4
@@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) {
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
   ; CHECK-NEXT:   G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   $x0 = COPY [[C3]](s64)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C4]](s64)
   ; CHECK-NEXT:   RET_ReallyLR implicit $x0
 entry:
   %0 = load i64, ptr @var1_64, align 4
@@ -294,6 +299,7 @@ define i64 @f64_imm_cheap(double %a) {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
   ; CHECK-NEXT:   [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
   ; CHECK-NEXT:   G_BRCOND [[ICMP]](s1), %bb.4
   ; CHECK-NEXT:   G_BR %bb.2
   ; CHECK-NEXT: {{  $}}
@@ -301,24 +307,24 @@ define i64 @f64_imm_cheap(double %a) {
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
-  ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
-  ; CHECK-NEXT:   G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
+  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+  ; CHECK-NEXT:   G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.if.then2:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
   ; CHECK-NEXT:   [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
-  ; CHECK-NEXT:   G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
+  ; CHECK-NEXT:   G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
   ; CHECK-NEXT:   G_BR %bb.4
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.if.end:
   ; CHECK-NEXT:   [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
-  ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
-  ; CHECK-NEXT:   G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
-  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   $x0 = COPY [[C6]](s64)
+  ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+  ; CHECK-NEXT:   G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
+  ; CHECK-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; CHECK-NEXT:   $x0 = COPY [[C7]](s64)
   ; CHECK-NEXT:   RET_ReallyLR implicit $x0
 entry:
   %0 = load i64, ptr @var1_64, align 4

>From cdcd40b1164238ba7fe5b29a7a6f3e3bde8e0779 Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Wed, 5 Jun 2024 20:34:05 +0200
Subject: [PATCH 2/3] Add iteration counter and statistics

---
 .../llvm/CodeGen/GlobalISel/CombinerInfo.h    |  7 ++--
 llvm/lib/CodeGen/GlobalISel/Combiner.cpp      | 38 ++++++++++++++++++-
 .../GISel/AArch64O0PreLegalizerCombiner.cpp   |  2 +-
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 63cbbb41dedaf..2b0eb71f88082 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -50,10 +50,9 @@ struct CombinerInfo {
   /// Whether we're optimizing for minsize (-Oz).
   bool EnableMinSize;
 
-  /// Whether the Combiner repeatedly iterates over all instructions until no
-  /// combine can be applied. Disabling this improves compile-time, but the IR
-  /// might not get transformed completely.
-  bool EnableFixedPointIteration = true;
+  /// The maximum number of times the Combiner will iterate over the
+  /// MachineFunction. Setting this to 0 enables fixed-point iteration.
+  unsigned MaxIterations = 0;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 745d355db2df1..39df9f5331443 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/GlobalISel/Combiner.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
@@ -27,6 +28,12 @@
 
 using namespace llvm;
 
+STATISTIC(NumOneIteration, "Number of functions with one iteration");
+STATISTIC(NumTwoIterations, "Number of functions with two iterations");
+STATISTIC(NumThreeIterations, "Number of functions with three iterations");
+STATISTIC(NumFourOrMoreIterations,
+          "Number of functions with four or more iterations");
+
 namespace llvm {
 cl::OptionCategory GICombinerOptionCategory(
     "GlobalISel Combiner",
@@ -135,7 +142,11 @@ bool Combiner::combineMachineInstrs() {
   bool MFChanged = false;
   bool Changed;
 
-  do {
+  unsigned Iteration = 0;
+  while (true) {
+    ++Iteration;
+    LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n');
+
     WorkList.clear();
 
     // Collect all instructions. Do a post order traversal for basic blocks and
@@ -166,7 +177,30 @@ bool Combiner::combineMachineInstrs() {
       WLObserver->reportFullyCreatedInstrs();
     }
     MFChanged |= Changed;
-  } while (Changed && CInfo.EnableFixedPointIteration);
+
+    if (!Changed) {
+      LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #"
+                        << Iteration << '\n');
+      break;
+    }
+    // Iterate until a fixed-point is reached if MaxIterations == 0,
+    // otherwise limit the number of iterations.
+    if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) {
+      LLVM_DEBUG(
+          dbgs() << "\nCombiner reached iteration limit after iteration #"
+                 << Iteration << '\n');
+      break;
+    }
+  }
+
+  if (Iteration == 1)
+    ++NumOneIteration;
+  else if (Iteration == 2)
+    ++NumTwoIterations;
+  else if (Iteration == 3)
+    ++NumThreeIterations;
+  else
+    ++NumFourOrMoreIterations;
 
 #ifndef NDEBUG
   if (CSEInfo) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 83e4b023673b4..8c5c94b351c50 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -165,7 +165,7 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
                      /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
                      F.hasOptSize(), F.hasMinSize());
-  CInfo.EnableFixedPointIteration = false;
+  CInfo.MaxIterations = 1;
 
   AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
                                          /*CSEInfo*/ nullptr, RuleConfig, ST);

>From b43a18511b6ccc23a997972ffceafb460b9b6fd3 Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Thu, 6 Jun 2024 22:04:45 +0200
Subject: [PATCH 3/3] Add comment and reduce statistics

---
 llvm/lib/CodeGen/GlobalISel/Combiner.cpp                 | 9 +++------
 .../AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp      | 2 ++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 39df9f5331443..3310ce5455c97 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -30,9 +30,8 @@ using namespace llvm;
 
 STATISTIC(NumOneIteration, "Number of functions with one iteration");
 STATISTIC(NumTwoIterations, "Number of functions with two iterations");
-STATISTIC(NumThreeIterations, "Number of functions with three iterations");
-STATISTIC(NumFourOrMoreIterations,
-          "Number of functions with four or more iterations");
+STATISTIC(NumThreeOrMoreIterations,
+          "Number of functions with three or more iterations");
 
 namespace llvm {
 cl::OptionCategory GICombinerOptionCategory(
@@ -197,10 +196,8 @@ bool Combiner::combineMachineInstrs() {
     ++NumOneIteration;
   else if (Iteration == 2)
     ++NumTwoIterations;
-  else if (Iteration == 3)
-    ++NumThreeIterations;
   else
-    ++NumFourOrMoreIterations;
+    ++NumThreeOrMoreIterations;
 
 #ifndef NDEBUG
   if (CSEInfo) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 8c5c94b351c50..0ba3a543d114a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -165,6 +165,8 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
   CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
                      /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
                      F.hasOptSize(), F.hasMinSize());
+  // Disable fixed-point iteration in the Combiner. This improves compile-time
+  // at the cost of possibly missing optimizations. See PR#94291 for details.
   CInfo.MaxIterations = 1;
 
   AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,