[llvm] [AArch64][GISel] Assign registers into FPR if they feed into FP instructions indirectly via PHI (PR #94618)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 11 01:20:22 PDT 2024
https://github.com/Him188 updated https://github.com/llvm/llvm-project/pull/94618
>From 1b3e8d9867129abb09f0bda64102686c6a179943 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Mon, 3 Jun 2024 13:36:38 +0100
Subject: [PATCH 1/2] [AArch64][GISel] Assign registers into FPR if they feed
into FP instructions indirectly via G_PHI.
This change helps RBS to assign registers into FPR if they are later on used in an FP
instruction **indirectly via PHI**, to avoid unnecessary copies. Before this patch, we only considered direct usages.
Added an MIFlag DefinesFP, which is set in IRTranslator for MI that defines an FP register. Currently, we do this only for PHI.
This is a temporary solution to carry information about floating types to the regbank selection pass, before we officially implement this, e.g. in LLT.
---
llvm/include/llvm/CodeGen/MachineInstr.h | 1 +
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 +++
.../Target/AArch64/GISel/AArch64RegisterBankInfo.cpp | 12 ++++++++++++
3 files changed, 16 insertions(+)
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index db48a0ae55145..e723598997c16 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -117,6 +117,7 @@ class MachineInstr
NoConvergent = 1 << 17, // Call does not require convergence guarantees.
NonNeg = 1 << 18, // The operand is non-negative.
Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs.
+ DefinesFP = 1 << 20, // Defines floating-point value
};
private:
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 5289b993476db..761a817ce5b19 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3174,6 +3174,9 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
SmallVector<MachineInstr *, 4> Insts;
for (auto Reg : getOrCreateVRegs(PI)) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
+ if (PI.getType()->isFloatingPointTy())
+ MIB.setMIFlag(MachineInstr::DefinesFP);
+
Insts.push_back(MIB.getInstr());
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 7785e020eaaf1..20809c011020f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -527,6 +527,18 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
return false;
+ if (MI.getNumDefs() == 1 && MI.getFlag(MachineInstr::DefinesFP)) {
+ // If the instruction defines an FP register, e.g.
+ // %4 = phi float [ %2, %1 ], [ %7, %3 ]
+ // and the defined register %4 is used by any instruction that only takes
+ // FP operand, then assign it to FPR.
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ return onlyUsesFP(UseMI, MRI, TRI, Depth + 1);
+ }))
+ return true;
+ }
+
return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
return Op.isReg() &&
onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
>From eae71431f75b4f42ad48d41723b439cfa29d4747 Mon Sep 17 00:00:00 2001
From: Tianyi Guan <tguan at nvidia.com>
Date: Thu, 6 Jun 2024 15:24:19 +0100
Subject: [PATCH 2/2] Remove flag and do hoisting in RBS only for G_LOAD
---
llvm/include/llvm/CodeGen/MachineInstr.h | 1 -
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 --
.../AArch64/GISel/AArch64RegisterBankInfo.cpp | 35 +++++++++-----
.../AArch64/GISel/AArch64RegisterBankInfo.h | 7 +++
.../AArch64/GlobalISel/regbank-fp-use-def.mir | 48 ++++++++++++++++++-
5 files changed, 75 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index e723598997c16..db48a0ae55145 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -117,7 +117,6 @@ class MachineInstr
NoConvergent = 1 << 17, // Call does not require convergence guarantees.
NonNeg = 1 << 18, // The operand is non-negative.
Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs.
- DefinesFP = 1 << 20, // Defines floating-point value
};
private:
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 761a817ce5b19..5289b993476db 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3174,9 +3174,6 @@ bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
SmallVector<MachineInstr *, 4> Insts;
for (auto Reg : getOrCreateVRegs(PI)) {
auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
- if (PI.getType()->isFloatingPointTy())
- MIB.setMIFlag(MachineInstr::DefinesFP);
-
Insts.push_back(MIB.getInstr());
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 20809c011020f..987d7ffe7504d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -495,6 +495,22 @@ static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
}
}
+bool AArch64RegisterBankInfo::isPHIWithFPContraints(
+ const MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI, const unsigned Depth) const {
+ if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
+ return false;
+
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ if (onlyUsesFP(UseMI, MRI, TRI, Depth + 1))
+ return true;
+ return isPHIWithFPContraints(UseMI, MRI, TRI, Depth + 1);
+ }))
+ return true;
+ return false;
+}
+
bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI,
@@ -527,18 +543,6 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
if (!MI.isPHI() || Depth > MaxFPRSearchDepth)
return false;
- if (MI.getNumDefs() == 1 && MI.getFlag(MachineInstr::DefinesFP)) {
- // If the instruction defines an FP register, e.g.
- // %4 = phi float [ %2, %1 ], [ %7, %3 ]
- // and the defined register %4 is used by any instruction that only takes
- // FP operand, then assign it to FPR.
- if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
- [&](const MachineInstr &UseMI) {
- return onlyUsesFP(UseMI, MRI, TRI, Depth + 1);
- }))
- return true;
- }
-
return any_of(MI.explicit_uses(), [&](const MachineOperand &Op) {
return Op.isReg() &&
onlyDefinesFP(*MRI.getVRegDef(Op.getReg()), MRI, TRI, Depth + 1);
@@ -859,13 +863,18 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// instead of blind map every scalar to GPR.
if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
[&](const MachineInstr &UseMI) {
- // If we have at least one direct use in a FP instruction,
+ // If we have at least one direct or indirect use
+ // in a FP instruction,
// assume this was a floating point load in the IR. If it was
// not, we would have had a bitcast before reaching that
// instruction.
//
// Int->FP conversion operations are also captured in
// onlyDefinesFP().
+
+ if (isPHIWithFPContraints(UseMI, MRI, TRI))
+ return true;
+
return onlyUsesFP(UseMI, MRI, TRI) ||
onlyDefinesFP(UseMI, MRI, TRI);
}))
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
index b6364c6a64099..9d23f8480f779 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h
@@ -120,6 +120,13 @@ class AArch64RegisterBankInfo final : public AArch64GenRegisterBankInfo {
/// Maximum recursion depth for hasFPConstraints.
const unsigned MaxFPRSearchDepth = 2;
+ /// \returns true if \p MI is a PHI that its def is used by
+ /// any instruction that onlyUsesFP.
+ bool isPHIWithFPContraints(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ unsigned Depth = 0) const;
+
/// \returns true if \p MI only uses and defines FPRs.
bool hasFPConstraints(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI, unsigned Depth = 0) const;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
index eb768f3b16da3..4cdefa80b309b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir
@@ -150,7 +150,7 @@ body: |
...
---
-name: load_used_by_phi_gpr
+name: load_used_by_phi_gpr_copy_gpr
legalized: true
tracksRegLiveness: true
body: |
@@ -173,6 +173,50 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+ ; CHECK-NEXT: $w2 = COPY %phi(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w2
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $x0, $s0, $s1, $w0, $w1
+ %cond_wide:_(s32) = COPY $w0
+ %gpr_copy:_(s32) = COPY $w1
+ %ptr:_(p0) = COPY $x0
+ G_BRCOND %cond_wide, %bb.1
+ G_BR %bb.2
+ bb.1:
+ successors: %bb.2
+ %load:_(s32) = G_LOAD %ptr(p0) :: (load (s32))
+ G_BR %bb.2
+ bb.2:
+ %phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
+ $w2 = COPY %phi(s32)
+ RET_ReallyLR implicit $w2
+
+...
+---
+name: load_used_by_phi_gpr_copy_fpr
+legalized: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: load_used_by_phi_gpr
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: liveins: $x0, $s0, $s1, $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %cond_wide:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: %gpr_copy:gpr(s32) = COPY $w1
+ ; CHECK-NEXT: %ptr:gpr(p0) = COPY $x0
+ ; CHECK-NEXT: G_BRCOND %cond_wide(s32), %bb.1
+ ; CHECK-NEXT: G_BR %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %load:fpr(s32) = G_LOAD %ptr(p0) :: (load (s32))
+ ; CHECK-NEXT: G_BR %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: %phi:gpr(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
; CHECK-NEXT: $s0 = COPY %phi(s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
bb.0:
@@ -189,7 +233,7 @@ body: |
G_BR %bb.2
bb.2:
%phi:_(s32) = G_PHI %gpr_copy(s32), %bb.0, %load(s32), %bb.1
- $s0 = COPY %phi(s32)
+ $s0 = COPY %phi(s32) ; G_LOAD should consider this FPR constraint and assign %load FPR
RET_ReallyLR implicit $s0
...
More information about the llvm-commits
mailing list