[llvm] 67a8775 - [AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 21 16:51:43 PST 2020
Author: Amara Emerson
Date: 2020-01-21T16:51:31-08:00
New Revision: 67a87753225e7f5ad5b1fd151d6d2dde3d09ff09
URL: https://github.com/llvm/llvm-project/commit/67a87753225e7f5ad5b1fd151d6d2dde3d09ff09
DIFF: https://github.com/llvm/llvm-project/commit/67a87753225e7f5ad5b1fd151d6d2dde3d09ff09.diff
LOG: [AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible.
In GlobalISel we may in some unfortunate circumstances generate PHIs with
operands that are on separate banks. If-conversion doesn't currently check for
that case and ends up generating a CSEL on AArch64 with incorrect register
operands.
Differential Revision: https://reviews.llvm.org/D72961
Added:
llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
Modified:
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/EarlyIfConversion.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h
llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
llvm/lib/Target/SystemZ/SystemZInstrInfo.h
llvm/lib/Target/X86/X86InstrInfo.cpp
llvm/lib/Target/X86/X86InstrInfo.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 536fd2489092..4d1dfe1b0c65 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -838,15 +838,17 @@ class TargetInstrInfo : public MCInstrInfo {
///
/// @param MBB Block where select instruction would be inserted.
/// @param Cond Condition returned by analyzeBranch.
+ /// @param DstReg Virtual dest register that the result should write to.
/// @param TrueReg Virtual register to select when Cond is true.
/// @param FalseReg Virtual register to select when Cond is false.
/// @param CondCycles Latency from Cond+Branch to select output.
/// @param TrueCycles Latency from TrueReg to select output.
/// @param FalseCycles Latency from FalseReg to select output.
virtual bool canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond, unsigned TrueReg,
- unsigned FalseReg, int &CondCycles,
- int &TrueCycles, int &FalseCycles) const {
+ ArrayRef<MachineOperand> Cond, unsigned DstReg,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles,
+ int &FalseCycles) const {
return false;
}
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index fc31a9449e72..a67072ce340e 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
// Get target information.
- if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
- PI.CondCycles, PI.TCycles, PI.FCycles)) {
+ if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
+ PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
+ PI.FCycles)) {
LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0ed2a678c4f0..c07fb3a44b6f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -496,8 +496,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles,
+ unsigned DstReg, unsigned TrueReg,
+ unsigned FalseReg, int &CondCycles,
+ int &TrueCycles,
int &FalseCycles) const {
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -506,6 +507,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (!RC)
return false;
+ // Also need to check the dest regclass, in case we're trying to optimize
+ // something like:
+ // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
+ if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
+ return false;
+
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
unsigned ExtraCondLat = Cond.size() != 1;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 66e517e54903..f041624225e4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -191,7 +191,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ unsigned, unsigned, unsigned, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e1ee5b721d56..c2ac27553ae8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2128,8 +2128,8 @@ bool SIInstrInfo::reverseBranchCondition(
bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles,
+ unsigned DstReg, unsigned TrueReg,
+ unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const {
switch (Cond[0].getImm()) {
case VCCNZ:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b151a94b0d11..d988a00686c9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -293,9 +293,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
SmallVectorImpl<MachineOperand> &Cond) const override;
bool canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles,
+ ArrayRef<MachineOperand> Cond, unsigned DstReg,
+ unsigned TrueReg, unsigned FalseReg, int &CondCycles,
int &TrueCycles, int &FalseCycles) const override;
void insertSelect(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index c8adbbb8704e..6b448365215d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -753,9 +753,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
// Select analysis.
bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ ArrayRef<MachineOperand> Cond,
+ unsigned DstReg, unsigned TrueReg,
+ unsigned FalseReg, int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
if (Cond.size() != 2)
return false;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 21da3457dc9e..a549b8221a75 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -273,7 +273,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
// Select analysis.
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ unsigned, unsigned, unsigned, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 97c8fa7aa32e..191792ad41e1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles,
+ unsigned DstReg, unsigned TrueReg,
+ unsigned FalseReg, int &CondCycles,
+ int &TrueCycles,
int &FalseCycles) const {
// Not all subtargets have LOCR instructions.
if (!STI.hasLoadStoreOnCond())
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 8391970c7d9d..c3daf9e9c628 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -221,8 +221,9 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
int *BytesAdded = nullptr) const override;
bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
unsigned &SrcReg2, int &Mask, int &Value) const override;
- bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int&, int&, int&) const override;
+ bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+ unsigned, unsigned, unsigned, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 245346d82731..6cefb15fe9cd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2826,11 +2826,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
return Count;
}
-bool X86InstrInfo::
-canInsertSelect(const MachineBasicBlock &MBB,
- ArrayRef<MachineOperand> Cond,
- unsigned TrueReg, unsigned FalseReg,
- int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ ArrayRef<MachineOperand> Cond,
+ unsigned DstReg, unsigned TrueReg,
+ unsigned FalseReg, int &CondCycles,
+ int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
if (!Subtarget.hasCMov())
return false;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 1d2da5305357..7c7124383f1f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -306,7 +306,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
- unsigned, unsigned, int &, int &, int &) const override;
+ unsigned, unsigned, unsigned, int &, int &,
+ int &) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
diff --git a/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
new file mode 100644
index 000000000000..436baae1a2b2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
@@ -0,0 +1,171 @@
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+ target triple = "arm64-apple-ios13.3.0"
+ define hidden void @phi_operands_regclasses_
diff erent() #0 {
+ entry:
+ br i1 undef, label %if.then139.i, label %if.else142.i
+
+ if.then139.i: ; preds = %entry
+ %0 = load double, double* undef, align 8
+ br label %if.end161.i
+
+ if.else142.i: ; preds = %entry
+ %tobool154.i = icmp eq i8 undef, 0
+ br i1 %tobool154.i, label %if.then155.i, label %if.end161.i
+
+ if.then155.i: ; preds = %if.else142.i
+ %add158.i = fadd double undef, 0.000000e+00
+ br label %if.end161.i
+
+ if.end161.i: ; preds = %if.then155.i, %if.else142.i, %if.then139.i
+ %y2.0.i = phi double [ %0, %if.then139.i ], [ 0.000000e+00, %if.else142.i ], [ 0.000000e+00, %if.then155.i ]
+ %add169.i = fadd double 0.000000e+00, %y2.0.i
+ %1 = fmul double undef, %add169.i
+ %add174.i = fsub double undef, %1
+ %2 = call double @llvm.fabs.f64(double %add174.i) #2
+ %cmp.i516.i = fcmp olt double %2, 0x3BC79CA10C924223
+ br i1 %cmp.i516.i, label %if.then.i.i, label %if.end9.i.i
+
+ if.then.i.i: ; preds = %if.end161.i
+ %3 = call double @llvm.fabs.f64(double undef) #2
+ unreachable
+
+ if.end9.i.i: ; preds = %if.end161.i
+ ret void
+ }
+ declare double @llvm.fabs.f64(double) #1
+ declare void @llvm.stackprotector(i8*, i8**) #2
+
+ attributes #0 = { "target-cpu"="apple-a7" }
+ attributes #1 = { nounwind readnone speculatable willreturn }
+ attributes #2 = { nounwind }
+
+ !llvm.ident = !{!0}
+
+ !0 = !{!"clang"}
+
+...
+---
+name: phi_operands_regclasses_
diff erent
+alignment: 4
+exposesReturnsTwice: false
+legalized: true
+regBankSelected: true
+selected: true
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+registers:
+ - { id: 0, class: gpr32, preferred-register: '' }
+ - { id: 1, class: _, preferred-register: '' }
+ - { id: 2, class: _, preferred-register: '' }
+ - { id: 3, class: gpr, preferred-register: '' }
+ - { id: 4, class: gpr64, preferred-register: '' }
+ - { id: 5, class: fpr, preferred-register: '' }
+ - { id: 6, class: _, preferred-register: '' }
+ - { id: 7, class: gpr64, preferred-register: '' }
+ - { id: 8, class: gpr64common, preferred-register: '' }
+ - { id: 9, class: gpr64, preferred-register: '' }
+ - { id: 10, class: fpr64, preferred-register: '' }
+ - { id: 11, class: fpr64, preferred-register: '' }
+ - { id: 12, class: fpr, preferred-register: '' }
+ - { id: 13, class: fpr64, preferred-register: '' }
+ - { id: 14, class: fpr, preferred-register: '' }
+ - { id: 15, class: gpr32, preferred-register: '' }
+ - { id: 16, class: _, preferred-register: '' }
+ - { id: 17, class: gpr32, preferred-register: '' }
+ - { id: 18, class: gpr32, preferred-register: '' }
+ - { id: 19, class: gpr32, preferred-register: '' }
+ - { id: 20, class: gpr, preferred-register: '' }
+ - { id: 21, class: fpr64, preferred-register: '' }
+ - { id: 22, class: fpr64, preferred-register: '' }
+ - { id: 23, class: fpr64, preferred-register: '' }
+ - { id: 24, class: fpr64, preferred-register: '' }
+ - { id: 25, class: fpr64, preferred-register: '' }
+ - { id: 26, class: fpr64, preferred-register: '' }
+ - { id: 27, class: fpr64, preferred-register: '' }
+ - { id: 28, class: gpr64, preferred-register: '' }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+callSites: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; Here we check that we don't ifcvt to a CSEL that uses GPRs, because
+ ; some operands to the PHI have the fpr64 regclass.
+ ; CHECK-LABEL: name: phi_operands_regclasses_
diff erent
+ ; CHECK-NOT: CSELXr
+ bb.1.entry:
+ successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+ %0:gpr32 = IMPLICIT_DEF
+ %4:gpr64 = IMPLICIT_DEF
+ %8:gpr64common = IMPLICIT_DEF
+ TBNZW %0, 0, %bb.2
+ B %bb.3
+
+ bb.2.if.then139.i:
+ successors: %bb.5(0x80000000)
+
+ %7:gpr64 = LDRXui %8, 0 :: (load 8 from `double* undef`)
+ B %bb.5
+
+ bb.3.if.else142.i:
+ successors: %bb.4(0x40000000), %bb.5(0x40000000)
+
+ %26:fpr64 = FMOVD0
+ %19:gpr32 = COPY $wzr
+ CBNZW %19, %bb.5
+
+ bb.4.if.then155.i:
+ successors: %bb.5(0x80000000)
+
+ %27:fpr64 = FMOVD0
+
+ bb.5.if.end161.i:
+ successors: %bb.6(0x40000000), %bb.7(0x40000000)
+
+ %9:gpr64 = PHI %7, %bb.2, %26, %bb.3, %27, %bb.4
+ %21:fpr64 = COPY %9
+ %25:fpr64 = FMOVD0
+ %10:fpr64 = FADDDrr %25, %21
+ %22:fpr64 = COPY %4
+ %11:fpr64 = FMULDrr %22, %10
+ %23:fpr64 = COPY %4
+ %13:fpr64 = FABD64 %23, %11
+ %28:gpr64 = MOVi64imm 4307583784117748259
+ %24:fpr64 = COPY %28
+ FCMPDrr %13, %24, implicit-def $nzcv
+ %17:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
+ TBNZW %17, 0, %bb.6
+ B %bb.7
+
+ bb.6.if.then.i.i:
+ successors:
+
+
+ bb.7.if.end9.i.i:
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list