[llvm] r359940 - [AArch64][GlobalISel] Use fcsel instead of csel for G_SELECT on FPRs
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Fri May 3 15:37:46 PDT 2019
Author: paquette
Date: Fri May 3 15:37:46 2019
New Revision: 359940
URL: http://llvm.org/viewvc/llvm-project?rev=359940&view=rev
Log:
[AArch64][GlobalISel] Use fcsel instead of csel for G_SELECT on FPRs
This saves us some unnecessary copies.
If the inputs to a G_SELECT are floating point, we should use fcsel rather than
csel.
Changes here are...
- Teach selectCopy about s1-to-s1 copies across register banks.
- AArch64RegisterBankInfo about G_SELECT in general.
- Teach the instruction selector about the FCSEL instructions.
Also add two tests:
- select-select.mir to show that we get the expected FCSEL
- regbank-select.mir (unfortunately named) to show the register banks on
G_SELECT are properly preserved
And update fast-isel-select.ll to show that we do the same thing as other
instruction selectors in these cases.
Added:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-select.mir
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
llvm/trunk/test/CodeGen/AArch64/fast-isel-select.ll
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=359940&r1=359939&r2=359940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Fri May 3 15:37:46 2019
@@ -521,6 +521,36 @@ static bool selectSubregisterCopy(Machin
return true;
}
+/// Helper function to get the source and destination register classes for a
+/// copy. Returns a std::pair containing the source register class for the
+/// copy, and the destination register class for the copy. If a register class
+/// cannot be determined, then it will be nullptr.
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI) {
+ unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg = I.getOperand(1).getReg();
+ const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+ const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+ // Special casing for cross-bank copies of s1s. We can technically represent
+ // a 1-bit value with any size of register. The minimum size for a GPR is 32
+ // bits. So, we need to put the FPR on 32 bits as well.
+ //
+ // FIXME: I'm not sure if this case holds true outside of copies. If it does,
+ // then we can pull it into the helpers that get the appropriate class for a
+ // register bank. Or make a new helper that carries along some constraint
+ // information.
+ if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+ SrcSize = DstSize = 32;
+
+ return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
+ getMinClassForRegBank(DstRegBank, DstSize, true)};
+}
+
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
@@ -529,8 +559,12 @@ static bool selectCopy(MachineInstr &I,
unsigned SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- const TargetRegisterClass *DstRC = getMinClassForRegBank(
- DstRegBank, RBI.getSizeInBits(DstReg, MRI, TRI), true);
+
+ // Find the correct register classes for the source and destination registers.
+ const TargetRegisterClass *SrcRC;
+ const TargetRegisterClass *DstRC;
+ std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
+
if (!DstRC) {
LLVM_DEBUG(dbgs() << "Unexpected dest size "
<< RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
@@ -563,8 +597,6 @@ static bool selectCopy(MachineInstr &I,
// a SUBREG_TO_REG.
if (I.isCopy()) {
// Yes. Check if there's anything to fix up.
- const TargetRegisterClass *SrcRC = getMinClassForRegBank(
- SrcRegBank, RBI.getSizeInBits(SrcReg, MRI, TRI), true);
if (!SrcRC) {
LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
return false;
@@ -1724,12 +1756,16 @@ bool AArch64InstructionSelector::select(
const unsigned TReg = I.getOperand(2).getReg();
const unsigned FReg = I.getOperand(3).getReg();
+ // If we have a floating-point result, then we should use a floating point
+ // select instead of an integer select.
+ bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
+ AArch64::GPRRegBankID);
unsigned CSelOpc = 0;
if (Ty == LLT::scalar(32)) {
- CSelOpc = AArch64::CSELWr;
+ CSelOpc = IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
- CSelOpc = AArch64::CSELXr;
+ CSelOpc = IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
} else {
return false;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp?rev=359940&r1=359939&r2=359940&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64RegisterBankInfo.cpp Fri May 3 15:37:46 2019
@@ -476,6 +476,24 @@ AArch64RegisterBankInfo::getInstrMapping
const TargetSubtargetInfo &STI = MF.getSubtarget();
const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ // Helper lambda that returns true if MI has floating point constraints.
+ auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
+ unsigned Op = MI.getOpcode();
+
+ // Do we have an explicit floating point instruction?
+ if (isPreISelGenericFloatingPointOpcode(Op))
+ return true;
+
+ // No. Check if we have a copy-like instruction. If we do, then we could
+ // still be fed by floating point instructions.
+ if (Op != TargetOpcode::COPY && !MI.isPHI())
+ return false;
+
+ // MI is copy-like. Return true if it's using an FPR.
+ return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
+ &AArch64::FPRRegBank;
+ };
+
switch (Opc) {
// G_{F|S|U}REM are not listed because they are not legal.
// Arithmetic ops.
@@ -657,30 +675,27 @@ AArch64RegisterBankInfo::getInstrMapping
break;
}
break;
+ case TargetOpcode::G_SELECT: {
+ // If the destination is FPR, preserve that.
+ if (OpRegBankIdx[0] != PMI_FirstGPR)
+ break;
+ LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ if (SrcTy.isVector() ||
+ any_of(MRI.use_instructions(MI.getOperand(0).getReg()),
+ [&](MachineInstr &MI) { return HasFPConstraints(MI); })) {
+ // Set the register bank of every operand to FPR.
+ for (unsigned Idx = 0, NumOperands = MI.getNumOperands();
+ Idx < NumOperands; ++Idx)
+ OpRegBankIdx[Idx] = PMI_FirstFPR;
+ }
+ break;
+ }
case TargetOpcode::G_UNMERGE_VALUES: {
// If the first operand belongs to a FPR register bank, then make sure that
// we preserve that.
if (OpRegBankIdx[0] != PMI_FirstGPR)
break;
- // Helper lambda that returns true if MI has floating point constraints.
- auto HasFPConstraints = [&TRI, &MRI, this](MachineInstr &MI) {
- unsigned Op = MI.getOpcode();
-
- // Do we have an explicit floating point instruction?
- if (isPreISelGenericFloatingPointOpcode(Op))
- return true;
-
- // No. Check if we have a copy-like instruction. If we do, then we could
- // still be fed by floating point instructions.
- if (Op != TargetOpcode::COPY && !MI.isPHI())
- return false;
-
- // MI is copy-like. Return true if it's using an FPR.
- return getRegBank(MI.getOperand(0).getReg(), MRI, TRI) ==
- &AArch64::FPRRegBank;
- };
-
LLT SrcTy = MRI.getType(MI.getOperand(MI.getNumOperands()-1).getReg());
// UNMERGE into scalars from a vector should always use FPR.
// Likewise if any of the uses are FP instructions.
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir?rev=359940&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/regbank-select.mir Fri May 3 15:37:46 2019
@@ -0,0 +1,60 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: select_f32
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $s0, $s1, $w0
+
+ ; CHECK-LABEL: name: select_f32
+ ; CHECK: liveins: $s0, $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr(s32) = COPY $s1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+ ; CHECK: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+ ; CHECK: $s0 = COPY [[SELECT]](s32)
+ ; CHECK: RET_ReallyLR implicit $s0
+ %3:_(s32) = COPY $w0
+ %0:_(s1) = G_TRUNC %3(s32)
+ %1:_(s32) = COPY $s0
+ %2:_(s32) = COPY $s1
+ %4:_(s32) = G_SELECT %0(s1), %1, %2
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: select_f64
+alignment: 2
+legalized: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $d1, $w0
+
+ ; CHECK-LABEL: name: select_f64
+ ; CHECK: liveins: $d0, $d1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr(s64) = COPY $d1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr(s1) = COPY [[TRUNC]](s1)
+ ; CHECK: [[SELECT:%[0-9]+]]:fpr(s64) = G_SELECT [[COPY3]](s1), [[COPY1]], [[COPY2]]
+ ; CHECK: $d0 = COPY [[SELECT]](s64)
+ ; CHECK: RET_ReallyLR implicit $d0
+ %3:_(s32) = COPY $w0
+ %0:_(s1) = G_TRUNC %3(s32)
+ %1:_(s64) = COPY $d0
+ %2:_(s64) = COPY $d1
+ %4:_(s64) = G_SELECT %0(s1), %1, %2
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-select.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-select.mir?rev=359940&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-select.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-select.mir Fri May 3 15:37:46 2019
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: select_f32
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $s0, $s1, $w0
+
+ ; CHECK-LABEL: name: select_f32
+ ; CHECK: liveins: $s0, $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+ ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+ ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+ ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+ ; CHECK: $s0 = COPY [[FCSELSrrr]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ %3:gpr(s32) = COPY $w0
+ %0:gpr(s1) = G_TRUNC %3(s32)
+ %1:fpr(s32) = COPY $s0
+ %2:fpr(s32) = COPY $s1
+ %5:fpr(s1) = COPY %0(s1)
+ %4:fpr(s32) = G_SELECT %5(s1), %1, %2
+ $s0 = COPY %4(s32)
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: select_f64
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $d1, $w0
+
+ ; CHECK-LABEL: name: select_f64
+ ; CHECK: liveins: $d0, $d1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[COPY]]
+ ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY [[COPY3]]
+ ; CHECK: $wzr = ANDSWri [[COPY4]], 0, implicit-def $nzcv
+ ; CHECK: [[FCSELDrrr:%[0-9]+]]:fpr64 = FCSELDrrr [[COPY1]], [[COPY2]], 1, implicit $nzcv
+ ; CHECK: $d0 = COPY [[FCSELDrrr]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %3:gpr(s32) = COPY $w0
+ %0:gpr(s1) = G_TRUNC %3(s32)
+ %1:fpr(s64) = COPY $d0
+ %2:fpr(s64) = COPY $d1
+ %5:fpr(s1) = COPY %0(s1)
+ %4:fpr(s64) = G_SELECT %5(s1), %1, %2
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
Modified: llvm/trunk/test/CodeGen/AArch64/fast-isel-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fast-isel-select.ll?rev=359940&r1=359939&r2=359940&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fast-isel-select.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fast-isel-select.ll Fri May 3 15:37:46 2019
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
; First test the different supported value types for select.
define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -46,6 +47,9 @@ define float @select_f32(i1 zeroext %c,
; CHECK-LABEL: select_f32
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
+; GISEL-LABEL: select_f32
+; GISEL: {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT: fcsel {{s[0-9]+}}, s0, s1, ne
%1 = select i1 %c, float %a, float %b
ret float %1
}
@@ -54,6 +58,9 @@ define double @select_f64(i1 zeroext %c,
; CHECK-LABEL: select_f64
; CHECK: {{cmp w0, #0|tst w0, #0x1}}
; CHECK-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
+; GISEL-LABEL: select_f64
+; GISEL: {{cmp w0, #0|tst w0, #0x1}}
+; GISEL-NEXT: fcsel {{d[0-9]+}}, d0, d1, ne
%1 = select i1 %c, double %a, double %b
ret double %1
}
More information about the llvm-commits
mailing list