[llvm] 9d7ec46 - [AArch64][GlobalISel] Infer whether G_PHI is going to be a FPR in regbankselect
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 28 10:37:19 PDT 2020
Author: Jessica Paquette
Date: 2020-09-28T10:37:09-07:00
New Revision: 9d7ec46f5740d7626171c2b8198f825176991e0a
URL: https://github.com/llvm/llvm-project/commit/9d7ec46f5740d7626171c2b8198f825176991e0a
DIFF: https://github.com/llvm/llvm-project/commit/9d7ec46f5740d7626171c2b8198f825176991e0a.diff
LOG: [AArch64][GlobalISel] Infer whether G_PHI is going to be a FPR in regbankselect
Some instructions (G_LOAD, G_SELECT, G_UNMERGE_VALUES) check if their uses
will define/use FPRs (using `onlyUsesFP` and `onlyDefinesFP`).
The register bank of a use isn't necessarily known when an instruction asks for
this.
Teach `hasFPConstraints` to look at the instructions feeding into a G_PHI when
its destination bank is unknown. If any of them are FPR, assume the entire
G_PHI will also be assigned a FPR.
Since a phi can have many inputs, and those inputs can in turn be phis,
restrict the search depth to a very low number.
Also improve the docs for `hasFPConstraints` and friends a little.
This is a 0.3% code size improvement on CTMark/Bullet at -O3, and a 0.2% code
size improvement at CTMark/pairlocalalign at -O3.
Differential Revision: https://reviews.llvm.org/D88177
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 36db76562e702..cb908a69bd1fa 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -466,9 +466,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
-bool AArch64RegisterBankInfo::hasFPConstraints(
- const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const {
+bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
unsigned Op = MI.getOpcode();
// Do we have an explicit floating point instruction?
@@ -480,14 +481,30 @@ bool AArch64RegisterBankInfo::hasFPConstraints(
if (Op != TargetOpcode::COPY && !MI.isPHI())
return false;
- // MI is copy-like. Return true if it outputs an FPR.
- return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
- &AArch64::FPRRegBank;
+ // Check if we already know the register bank.
+ auto *RB = getRegBank(MI.getOperand(0).getReg(), MRI, TRI);
+ if (RB == &AArch64::FPRRegBank)
+ return true;
+ if (RB == &AArch64::GPRRegBank)
+ return false;
+
+ // We don't know anything.
+ //
+ // If we have a phi, we may be able to infer that it will be assigned a FPR
+ // based off of its inputs.
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
+ return Op.isReg() &&
+ onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
+ });
}
bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const {
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
@@ -496,12 +513,13 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
default:
break;
}
- return hasFPConstraints(MI, MRI, TRI);
+ return hasFPConstraints(MI, MRI, TRI, Depth);
}
-bool AArch64RegisterBankInfo::onlyDefinesFP(
- const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const {
+bool AArch64RegisterBankInfo::onlyDefinesFP(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth) const {
switch (MI.getOpcode()) {
case AArch64::G_DUP:
case TargetOpcode::G_SITOFP:
@@ -512,7 +530,7 @@ bool AArch64RegisterBankInfo::onlyDefinesFP(
default:
break;
}
- return hasFPConstraints(MI, MRI, TRI);
+ return hasFPConstraints(MI, MRI, TRI, Depth);
}
const RegisterBankInfo::InstructionMapping &
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index e956fca1aa109..019017bc3ec4e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -114,17 +114,20 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
const InstructionMapping &
getSameKindOfOperandsMapping(const MachineInstr &MI) const;
- /// Returns true if the output of \p MI must be stored on a FPR register.
+ /// Maximum recursion depth for hasFPConstraints.
+ const unsigned MaxFPRSearchDepth = 2;
+
+ /// \returns true if \p MI only uses and defines FPRs.
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
- /// Returns true if the source registers of \p MI must all be FPRs.
+ /// \returns true if \p MI only uses FPRs.
bool onlyUsesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
- /// Returns true if the destination register of \p MI must be a FPR.
+ /// \returns true if \p MI only defines FPRs.
bool onlyDefinesFP(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI) const;
+ const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
public:
AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index 3ca889050d2cc..a7aae275fa5d0 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -101,3 +101,263 @@ body: |
%4:_(s32) = G_SITOFP %2
%6:_(s32) = G_SELECT %1(s1), %3, %4
%8:_(s32) = G_FPTOSI %6
+
+...
+---
+name: load_used_by_phi_fpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: load_used_by_phi_fpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
+ ; CHECK: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
+ ; CHECK: %fpr_copy:fpr(s32) = COPY $s0
+ ; CHECK: %ptr:gpr(p0) = COPY $x0
+ ; CHECK: G_BRCOND %cond(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1
+ ; CHECK: $s0 = COPY %phi(s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $x0, $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %cond:_(s1) = G_TRUNC %cond_wide(s32)
+ %fpr_copy:_(s32) = COPY $s0
+ %ptr:_(p0) = COPY $x0
+ G_BRCOND %cond(s1), %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %load:_(s32) = G_LOAD %ptr(p0) :: (load 4)
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %load(s32), %bb.1
+ $s0 = COPY %phi(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: load_used_by_phi_gpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: load_used_by_phi_gpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
+ ; CHECK: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
+ ; CHECK: %gpr_copy:gpr(s32) = COPY $w1
+ ; CHECK: %ptr:gpr(p0) = COPY $x0
+ ; CHECK: G_BRCOND %cond(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+ ; CHECK: $s0 = COPY %phi(s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $x0, $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %cond:_(s1) = G_TRUNC %cond_wide(s32)
+ %gpr_copy:_(s32) = COPY $w1
+ %ptr:_(p0) = COPY $x0
+ G_BRCOND %cond(s1), %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %load:_(s32) = G_LOAD %ptr(p0) :: (load 4)
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+ $s0 = COPY %phi(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: select_used_by_phi_fpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: select_used_by_phi_fpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $s0, $s1, $w0, $w1
+ ; CHECK: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
+ ; CHECK: %fpr_copy:fpr(s32) = COPY $s0
+ ; CHECK: %gpr_copy:gpr(s32) = COPY $w1
+ ; CHECK: G_BRCOND %cond(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %gpr_copy(s32)
+ ; CHECK: %select:fpr(s32) = G_SELECT %cond(s1), %fpr_copy, [[COPY]]
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %select(s32), %bb.1
+ ; CHECK: $w0 = COPY %phi(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ ; The G_SELECT and G_PHI should end up with the same register bank.
+ ;
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %cond:_(s1) = G_TRUNC %cond_wide(s32)
+ %fpr_copy:_(s32) = COPY $s0
+ %gpr_copy:_(s32) = COPY $w1
+ G_BRCOND %cond(s1), %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %select:_(s32) = G_SELECT %cond(s1), %fpr_copy, %gpr_copy
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %select(s32), %bb.1
+ $w0 = COPY %phi(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: select_used_by_phi_gpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: select_used_by_phi_gpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $s0, $s1, $w0, $w1
+ ; CHECK: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
+ ; CHECK: %fpr_copy:fpr(s32) = COPY $s0
+ ; CHECK: %gpr_copy:gpr(s32) = COPY $w1
+ ; CHECK: G_BRCOND %cond(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %fpr_copy(s32)
+ ; CHECK: %select:gpr(s32) = G_SELECT %cond(s1), [[COPY]], %gpr_copy
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %select(s32), %bb.1
+ ; CHECK: $s0 = COPY %phi(s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ ; The G_SELECT and G_PHI should end up with the same register bank.
+ ;
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %cond:_(s1) = G_TRUNC %cond_wide(s32)
+ %fpr_copy:_(s32) = COPY $s0
+ %gpr_copy:_(s32) = COPY $w1
+ G_BRCOND %cond(s1), %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %select:_(s32) = G_SELECT %cond(s1), %fpr_copy, %gpr_copy
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %select(s32), %bb.1
+ $s0 = COPY %phi(s32)
+ RET_ReallyLR implicit $s0
+
+
+...
+---
+name: unmerge_used_by_phi_fpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: unmerge_used_by_phi_fpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
+ ; CHECK: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
+ ; CHECK: %fpr_copy:fpr(s32) = COPY $s0
+ ; CHECK: %unmerge_src:gpr(s64) = COPY $x0
+ ; CHECK: G_BRCOND %cond(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %unmerge_src(s64)
+ ; CHECK: %unmerge_1:fpr(s32), %unmerge_2:fpr(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: %phi:fpr(s32) = G_PHI %fpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
+ ; CHECK: $s0 = COPY %phi(s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $x0, $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %cond:_(s1) = G_TRUNC %cond_wide(s32)
+ %fpr_copy:_(s32) = COPY $s0
+ %unmerge_src:_(s64) = COPY $x0
+ G_BRCOND %cond(s1), %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %unmerge_1:_(s32), %unmerge_2:_(s32) = G_UNMERGE_VALUES %unmerge_src(s64)
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %fpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
+ $s0 = COPY %phi(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: unmerge_used_by_phi_gpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: unmerge_used_by_phi_gpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $x0, $s0, $s1, $w0, $w1
+ ; CHECK: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK: %cond:gpr(s1) = G_TRUNC %cond_wide(s32)
+ ; CHECK: %gpr_copy:gpr(s32) = COPY $w1
+ ; CHECK: %unmerge_src:gpr(s64) = COPY $x0
+ ; CHECK: G_BRCOND %cond(s1), %bb.1
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: %unmerge_1:gpr(s32), %unmerge_2:gpr(s32) = G_UNMERGE_VALUES %unmerge_src(s64)
+ ; CHECK: G_BR %bb.2
+ ; CHECK: bb.2:
+ ; CHECK: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
+ ; CHECK: $s0 = COPY %phi(s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $x0, $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %cond:_(s1) = G_TRUNC %cond_wide(s32)
+ %gpr_copy:_(s32) = COPY $w1
+ %unmerge_src:_(s64) = COPY $x0
+ G_BRCOND %cond(s1), %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %unmerge_1:_(s32), %unmerge_2:_(s32) = G_UNMERGE_VALUES %unmerge_src(s64)
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %unmerge_1(s32), %bb.1
+ $s0 = COPY %phi(s32)
+ RET_ReallyLR implicit $s0
More information about the llvm-commits
mailing list