[llvm] [AMDGPU][GlobalISel] Disable fixed-point iteration in all Combiners (PR #105517)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 05:55:55 PDT 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/105517
Disable fixed-point iteration in all AMDGPU Combiners after #102163.
This saves around 2% compile time in ad hoc testing on some large
graphics shaders. I did not notice any regressions in the generated
code, just a bunch of harmless differences in instruction selection and
register allocation.
>From 161cda1bee004528252661d8bf6d521d3815b993 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 21 Aug 2024 13:51:44 +0100
Subject: [PATCH] [AMDGPU][GlobalISel] Disable fixed-point iteration in all
Combiners
Disable fixed-point iteration in all AMDGPU Combiners after #102163.
This saves around 2% compile time in ad hoc testing on some large
graphics shaders. I did not notice any regressions in the generated
code, just a bunch of harmless differences in instruction selection and
register allocation.
---
llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp | 6 +++++-
llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 6 ++++++
llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp | 6 ++++++
.../AMDGPU/GlobalISel/postlegalizercombiner-and.mir | 8 ++++++++
4 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index cfe9f33efc91b8..54d927c33fc553 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -499,7 +499,11 @@ bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
-
+ // Disable fixed-point iteration to reduce compile-time
+ CInfo.MaxIterations = 1;
+ CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
+ // Legalizer performs DCE, so a full DCE pass is unnecessary.
+ CInfo.EnableFullDCE = false;
AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
RuleConfig, ST, MDT, LI);
return Impl.combineMachineInstrs();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 4d0cb467ba374d..ff8189ce31f7f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -276,6 +276,12 @@ bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
: &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize());
+ // Disable fixed-point iteration to reduce compile-time
+ CInfo.MaxIterations = 1;
+ CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
+ // This is the first Combiner, so the input IR might contain dead
+ // instructions.
+ CInfo.EnableFullDCE = true;
AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig,
STI, MDT, STI.getLegalizerInfo());
return Impl.combineMachineInstrs();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 74f0540239c939..e236a5d7522e02 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -454,6 +454,12 @@ bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
+ // Disable fixed-point iteration to reduce compile-time
+ CInfo.MaxIterations = 1;
+ CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
+ // RegBankSelect seems not to leave dead instructions, so a full DCE pass is
+ // unnecessary.
+ CInfo.EnableFullDCE = false;
AMDGPURegBankCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
RuleConfig, ST, MDT, LI);
return Impl.combineMachineInstrs();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
index 67e6de1ce76449..fdc22a23f74163 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir
@@ -203,6 +203,7 @@ body: |
; CHECK-LABEL: name: remove_and_65535_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask
@@ -225,6 +226,7 @@ body: |
; CHECK-LABEL: name: remove_and_131071_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK-NEXT: $vgpr0 = COPY %lds_size(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
@@ -245,6 +247,7 @@ body: |
; CHECK-LABEL: name: no_remove_and_65536_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 65536
; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask
@@ -267,6 +270,7 @@ body: |
; CHECK-LABEL: name: no_remove_and_32767_groupstaticsize
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %lds_size:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize)
; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 32767
; CHECK-NEXT: %and:_(s32) = G_AND %lds_size, %mask
@@ -291,6 +295,8 @@ body: |
; CHECK-LABEL: name: remove_and_umin_lhs_only
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3
; CHECK-NEXT: %val:_(s32) = COPY $vgpr4
; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255
@@ -316,6 +322,8 @@ body: |
; CHECK-LABEL: name: remove_and_umin_rhs_only
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr0:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %ptr1:_(p1) = COPY $vgpr2_vgpr3
; CHECK-NEXT: %val:_(s32) = COPY $vgpr4
; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255
More information about the llvm-commits
mailing list