[llvm] [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration (PR #94291)
Tobias Stadler via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 16:21:57 PDT 2024
https://github.com/tobias-stadler created https://github.com/llvm/llvm-project/pull/94291
This adds an option to CombinerInfo to turn off the fixed-point iteration in the Combiner. This option is then used for AArch64O0PreLegalizerCombiner. The combines there are simple enough that code quality impact should be minimal with the current heuristics (instructions are processed from top to bottom of the basic block, new/changed instructions are added back to the worklist). Test changes are due to some instructions not being DCE'd, which has no actual impact because InstructionSelect performs DCE as well.
AArch64 CTMark O0:
-0.9% geomean compile-time (instruction count), no regressions
no change in size..text for any of the benchmarks
>From 3cdefa8af1199da369104744c0429667deb4cb3f Mon Sep 17 00:00:00 2001
From: Tobias Stadler <mail at stadler-tobias.de>
Date: Tue, 4 Jun 2024 00:09:11 +0200
Subject: [PATCH] [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable
fixed-point iteration
This adds an option to CombinerInfo to turn off the fixed-point
iteration in the Combiner. This option is then used for
AArch64O0PreLegalizerCombiner. The combines there are simple enough that
code quality impact should be minimal with the current heuristics
(instructions are processed from top to bottom of the basic block,
new/changed instructions are added back to the worklist).
Test changes are due to some instructions not being DCE'd, which has no
actual impact because InstructionSelect performs DCE as well.
AArch64 CTMark O0:
-0.9% compile-time (instruction count)
no impact on size..text
---
.../llvm/CodeGen/GlobalISel/CombinerInfo.h | 5 ++
llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 2 +-
.../GISel/AArch64O0PreLegalizerCombiner.cpp | 2 +
.../AArch64/GlobalISel/localizer-arm64-tti.ll | 54 ++++++++++---------
4 files changed, 38 insertions(+), 25 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
index 13a8faf955a7e..63cbbb41dedaf 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h
@@ -49,6 +49,11 @@ struct CombinerInfo {
bool EnableOptSize;
/// Whether we're optimizing for minsize (-Oz).
bool EnableMinSize;
+
+ /// Whether the Combiner repeatedly iterates over all instructions until no
+ /// combine can be applied. Disabling this improves compile-time, but the IR
+ /// might not get transformed completely.
+ bool EnableFixedPointIteration = true;
};
} // namespace llvm
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index d18e65a83484f..745d355db2df1 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -166,7 +166,7 @@ bool Combiner::combineMachineInstrs() {
WLObserver->reportFullyCreatedInstrs();
}
MFChanged |= Changed;
- } while (Changed);
+ } while (Changed && CInfo.EnableFixedPointIteration);
#ifndef NDEBUG
if (CSEInfo) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 17dd8f2314a2b..83e4b023673b4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -165,6 +165,8 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, /*EnableOpt*/ false,
F.hasOptSize(), F.hasMinSize());
+ CInfo.EnableFixedPointIteration = false;
+
AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB,
/*CSEInfo*/ nullptr, RuleConfig, ST);
return Impl.combineMachineInstrs();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
index 5ab086ffd2c13..c4e07de265edd 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll
@@ -28,6 +28,7 @@ define i32 @foo() {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
@@ -35,19 +36,19 @@ define i32 @foo() {
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK-NEXT: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2)
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1
- ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+ ; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
- ; CHECK-NEXT: G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3)
- ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1)
+ ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3)
+ ; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1)
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C6]](s32)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY [[C7]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%0 = load i32, ptr @var1, align 4
@@ -84,6 +85,7 @@ define i32 @darwin_tls() {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]]
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
@@ -96,8 +98,8 @@ define i32 @darwin_tls() {
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.end:
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY [[C3]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%0 = load i32, ptr @var1, align 4
@@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() {
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
@@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() {
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C3]](s32)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: $w0 = COPY [[C4]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%0 = load i32, ptr @var1, align 4
@@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() {
; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]]
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
@@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() {
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: $x0 = COPY [[C3]](s64)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY [[C4]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
entry:
%0 = load i64, ptr @var1_64, align 4
@@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
@@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) {
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: $x0 = COPY [[C3]](s64)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY [[C4]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
entry:
%0 = load i64, ptr @var1_64, align 4
@@ -294,6 +299,7 @@ define i64 @f64_imm_cheap(double %a) {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
@@ -301,24 +307,24 @@ define i64 @f64_imm_cheap(double %a) {
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
- ; CHECK-NEXT: G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+ ; CHECK-NEXT: G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64)
; CHECK-NEXT: G_BR %bb.3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.if.then2:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64
- ; CHECK-NEXT: G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
+ ; CHECK-NEXT: G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64)
; CHECK-NEXT: G_BR %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.if.end:
; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
- ; CHECK-NEXT: G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: $x0 = COPY [[C6]](s64)
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00
+ ; CHECK-NEXT: G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64)
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: $x0 = COPY [[C7]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
entry:
%0 = load i64, ptr @var1_64, align 4
More information about the llvm-commits
mailing list