[llvm] 974cf71 - [AArch64][GlobalISel] Add a simple cross-regclass copy optimization post-selection.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 1 16:09:28 PDT 2022
Author: Amara Emerson
Date: 2022-11-01T16:09:21-07:00
New Revision: 974cf71649150c3da9b7590a2ade2a49bc197f4c
URL: https://github.com/llvm/llvm-project/commit/974cf71649150c3da9b7590a2ade2a49bc197f4c
DIFF: https://github.com/llvm/llvm-project/commit/974cf71649150c3da9b7590a2ade2a49bc197f4c.diff
LOG: [AArch64][GlobalISel] Add a simple cross-regclass copy optimization post-selection.
This does some trivial cross-regclass folding, where we can either do some extra
constraining to eliminate the copy or modify uses to use a smaller regclass.
There are minor code size improvements on average.
Program size.__text
before after diff
tramp3d-v4/tramp3d-v4 366000.00 366012.00 0.0%
mafft/pairlocalalign 248196.00 248188.00 -0.0%
7zip/7zip-benchmark 568612.00 568592.00 -0.0%
kimwitu++/kc 434704.00 434676.00 -0.0%
Bullet/bullet 456128.00 456096.00 -0.0%
sqlite3/sqlite3 284136.00 284100.00 -0.0%
ClamAV/clamscan 381492.00 381396.00 -0.0%
SPASS/SPASS 412052.00 411944.00 -0.0%
lencod/lencod 428060.00 427912.00 -0.0%
consumer-typeset/consumer-typeset 413148.00 411116.00 -0.5%
Geomean difference -0.1%
Differential Revision: https://reviews.llvm.org/D136793
Added:
llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
Modified:
llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index ce6f15a799b72..670a16209705c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -14,12 +14,15 @@
#include "AArch64.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#define DEBUG_TYPE "aarch64-post-select-optimize"
@@ -42,6 +45,9 @@ class AArch64PostSelectOptimize : public MachineFunctionPass {
private:
bool optimizeNZCVDefs(MachineBasicBlock &MBB);
+ bool doPeepholeOpts(MachineBasicBlock &MBB);
+ /// Look for cross regclass copies that can be trivially eliminated.
+ bool foldSimpleCrossClassCopies(MachineInstr &MI);
};
} // end anonymous namespace
@@ -74,6 +80,62 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
}
}
+bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
+ Changed |= foldSimpleCrossClassCopies(MI);
+ }
+ return Changed;
+}
+
+bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
+ auto *MF = MI.getMF();
+ auto &MRI = MF->getRegInfo();
+
+ if (!MI.isCopy())
+ return false;
+
+ if (MI.getOperand(1).getSubReg())
+ return false; // Don't deal with subreg copies
+
+ Register Src = MI.getOperand(1).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+
+ if (Src.isPhysical() || Dst.isPhysical())
+ return false;
+
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+
+ if (SrcRC == DstRC)
+ return false;
+
+
+ if (SrcRC->hasSubClass(DstRC)) {
+ // This is the case where the source class is a superclass of the dest, so
+ // if the copy is the only user of the source, we can just constrain the
+ // source reg to the dest class.
+
+ if (!MRI.hasOneNonDBGUse(Src))
+ return false; // Only constrain single uses of the source.
+
+ // Constrain to dst reg class as long as it's not a weird class that only
+ // has a few registers.
+ if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
+ return false;
+ } else if (DstRC->hasSubClass(SrcRC)) {
+ // This is the inverse case, where the destination class is a superclass of
+ // the source. Here, if the copy is the only user, we can just constrain
+ // the user of the copy to use the smaller class of the source.
+ } else {
+ return false;
+ }
+
+ MRI.replaceRegWith(Dst, Src);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
// Consider the following code:
// FCMPSrr %0, %1, implicit-def $nzcv
@@ -178,8 +240,10 @@ bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
"Expected a selected MF");
bool Changed = false;
- for (auto &BB : MF)
+ for (auto &BB : MF) {
Changed |= optimizeNZCVDefs(BB);
+ Changed |= doPeepholeOpts(BB);
+ }
return Changed;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
new file mode 100644
index 0000000000000..ea9c3881c6c3b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir
@@ -0,0 +1,116 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+ @x = external hidden local_unnamed_addr global i32*, align 8
+ define void @copy_from_larger_rc_def() { ret void }
+ define void @copy_from_larger_rc_def_multi_use() { ret void }
+ define void @copy_from_smaller_rc_def() { ret void }
+
+...
+---
+name: copy_from_larger_rc_def
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+ - { reg: '$x2' }
+body: |
+ bb.1:
+ liveins: $w1, $x0, $x2
+
+ ; Show that if we're doing a copy from a large rc to a single user with a smaller rc
+ ; then we just constrain the def instead.
+ ; CHECK-LABEL: name: copy_from_larger_rc_def
+ ; CHECK: liveins: $w1, $x0, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: %large_rc_def:gpr64common = UBFMXri [[COPY]], 61, 60
+ ; CHECK-NEXT: %add:gpr64sp = ADDXri %large_rc_def, 3, 0
+ ; CHECK-NEXT: $x0 = COPY %add
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:gpr64 = COPY $x0
+ %large_rc_def:gpr64 = UBFMXri %0, 61, 60
+ %constrain_copy:gpr64common = COPY %large_rc_def
+ ; Even though ADDXri may not actually need to use gpr64common, just use it as an example.
+ %add:gpr64sp = ADDXri %constrain_copy, 3, 0
+ $x0 = COPY %add
+ RET_ReallyLR
+
+...
+---
+name: copy_from_larger_rc_def_multi_use
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+ - { reg: '$x2' }
+body: |
+ bb.1:
+ liveins: $w1, $x0, $x2
+
+ ; Don't constrain def if the original def has multiple users.
+ ; CHECK-LABEL: name: copy_from_larger_rc_def_multi_use
+ ; CHECK: liveins: $w1, $x0, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: %large_rc_def:gpr64 = UBFMXri [[COPY]], 61, 60
+ ; CHECK-NEXT: %constrain_copy:gpr64common = COPY %large_rc_def
+ ; CHECK-NEXT: %add:gpr64sp = ADDXri %constrain_copy, 3, 0
+ ; CHECK-NEXT: %add2:gpr64sp = ADDXri %constrain_copy, 3, 0
+ ; CHECK-NEXT: $x0 = COPY %add
+ ; CHECK-NEXT: $x1 = COPY %large_rc_def
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:gpr64 = COPY $x0
+ %large_rc_def:gpr64 = UBFMXri %0, 61, 60
+ %constrain_copy:gpr64common = COPY %large_rc_def
+ %add:gpr64sp = ADDXri %constrain_copy, 3, 0
+ %add2:gpr64sp = ADDXri %constrain_copy, 3, 0
+ $x0 = COPY %add
+ $x1 = COPY %large_rc_def
+ RET_ReallyLR
+
+...
+---
+name: copy_from_smaller_rc_def
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+ - { reg: '$x2' }
+body: |
+ bb.1:
+ liveins: $w1, $x0, $x2
+
+ ; Show that if we're doing a copy from a small rc to a single user with a larger rc
+ ; then we just use the smaller def instead of doing a copy.
+ ; CHECK-LABEL: name: copy_from_smaller_rc_def
+ ; CHECK: liveins: $w1, $x0, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
+ ; CHECK-NEXT: %add:gpr64common = ADDXri [[COPY1]], 3, 0
+ ; CHECK-NEXT: STRXui [[COPY1]], %add, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0))
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:gpr64common = COPY $x0
+ %1:gpr64common = COPY $x1
+
+ %add:gpr64common = ADDXri %1, 3, 0
+ %copy:gpr64sp = COPY %add
+ STRXui %1, %copy, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0))
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list