[llvm] 67a8775 - [AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 21 16:51:43 PST 2020


Author: Amara Emerson
Date: 2020-01-21T16:51:31-08:00
New Revision: 67a87753225e7f5ad5b1fd151d6d2dde3d09ff09

URL: https://github.com/llvm/llvm-project/commit/67a87753225e7f5ad5b1fd151d6d2dde3d09ff09
DIFF: https://github.com/llvm/llvm-project/commit/67a87753225e7f5ad5b1fd151d6d2dde3d09ff09.diff

LOG: [AArch64] Don't generate gpr CSEL instructions in early-ifcvt if regclasses aren't compatible.

In GlobalISel we may in some unfortunate circumstances generate PHIs with
operands that are on separate banks. If-conversion doesn't currently check for
that case and ends up generating a CSEL on AArch64 with incorrect register
operands.

Differential Revision: https://reviews.llvm.org/D72961

Added: 
    llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir

Modified: 
    llvm/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/lib/CodeGen/EarlyIfConversion.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.h
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
    llvm/lib/Target/PowerPC/PPCInstrInfo.h
    llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
    llvm/lib/Target/SystemZ/SystemZInstrInfo.h
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86InstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 536fd2489092..4d1dfe1b0c65 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -838,15 +838,17 @@ class TargetInstrInfo : public MCInstrInfo {
   ///
   /// @param MBB         Block where select instruction would be inserted.
   /// @param Cond        Condition returned by analyzeBranch.
+  /// @param DstReg      Virtual dest register that the result should write to.
   /// @param TrueReg     Virtual register to select when Cond is true.
   /// @param FalseReg    Virtual register to select when Cond is false.
   /// @param CondCycles  Latency from Cond+Branch to select output.
   /// @param TrueCycles  Latency from TrueReg to select output.
   /// @param FalseCycles Latency from FalseReg to select output.
   virtual bool canInsertSelect(const MachineBasicBlock &MBB,
-                               ArrayRef<MachineOperand> Cond, unsigned TrueReg,
-                               unsigned FalseReg, int &CondCycles,
-                               int &TrueCycles, int &FalseCycles) const {
+                               ArrayRef<MachineOperand> Cond, unsigned DstReg,
+                               unsigned TrueReg, unsigned FalseReg,
+                               int &CondCycles, int &TrueCycles,
+                               int &FalseCycles) const {
     return false;
   }
 

diff  --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index fc31a9449e72..a67072ce340e 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -520,8 +520,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
     assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
 
     // Get target information.
-    if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
-                              PI.CondCycles, PI.TCycles, PI.FCycles)) {
+    if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
+                              PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
+                              PI.FCycles)) {
       LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
       return false;
     }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0ed2a678c4f0..c07fb3a44b6f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -496,8 +496,9 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
 
 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
                                        ArrayRef<MachineOperand> Cond,
-                                       unsigned TrueReg, unsigned FalseReg,
-                                       int &CondCycles, int &TrueCycles,
+                                       unsigned DstReg, unsigned TrueReg,
+                                       unsigned FalseReg, int &CondCycles,
+                                       int &TrueCycles,
                                        int &FalseCycles) const {
   // Check register classes.
   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -506,6 +507,12 @@ bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
   if (!RC)
     return false;
 
+  // Also need to check the dest regclass, in case we're trying to optimize
+  // something like:
+  // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
+  if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
+    return false;
+
   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
   unsigned ExtraCondLat = Cond.size() != 1;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 66e517e54903..f041624225e4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -191,7 +191,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   bool
   reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
-                       unsigned, unsigned, int &, int &, int &) const override;
+                       unsigned, unsigned, unsigned, int &, int &,
+                       int &) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const DebugLoc &DL, unsigned DstReg,
                     ArrayRef<MachineOperand> Cond, unsigned TrueReg,

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index e1ee5b721d56..c2ac27553ae8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2128,8 +2128,8 @@ bool SIInstrInfo::reverseBranchCondition(
 
 bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
                                   ArrayRef<MachineOperand> Cond,
-                                  unsigned TrueReg, unsigned FalseReg,
-                                  int &CondCycles,
+                                  unsigned DstReg, unsigned TrueReg,
+                                  unsigned FalseReg, int &CondCycles,
                                   int &TrueCycles, int &FalseCycles) const {
   switch (Cond[0].getImm()) {
   case VCCNZ:

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b151a94b0d11..d988a00686c9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -293,9 +293,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     SmallVectorImpl<MachineOperand> &Cond) const override;
 
   bool canInsertSelect(const MachineBasicBlock &MBB,
-                       ArrayRef<MachineOperand> Cond,
-                       unsigned TrueReg, unsigned FalseReg,
-                       int &CondCycles,
+                       ArrayRef<MachineOperand> Cond, unsigned DstReg,
+                       unsigned TrueReg, unsigned FalseReg, int &CondCycles,
                        int &TrueCycles, int &FalseCycles) const override;
 
   void insertSelect(MachineBasicBlock &MBB,

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index c8adbbb8704e..6b448365215d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -753,9 +753,10 @@ unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
 
 // Select analysis.
 bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
-                ArrayRef<MachineOperand> Cond,
-                unsigned TrueReg, unsigned FalseReg,
-                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+                                   ArrayRef<MachineOperand> Cond,
+                                   unsigned DstReg, unsigned TrueReg,
+                                   unsigned FalseReg, int &CondCycles,
+                                   int &TrueCycles, int &FalseCycles) const {
   if (Cond.size() != 2)
     return false;
 

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 21da3457dc9e..a549b8221a75 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -273,7 +273,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
 
   // Select analysis.
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
-                       unsigned, unsigned, int &, int &, int &) const override;
+                       unsigned, unsigned, unsigned, int &, int &,
+                       int &) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const DebugLoc &DL, unsigned DstReg,
                     ArrayRef<MachineOperand> Cond, unsigned TrueReg,

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 97c8fa7aa32e..191792ad41e1 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -532,8 +532,9 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
 
 bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
                                        ArrayRef<MachineOperand> Pred,
-                                       unsigned TrueReg, unsigned FalseReg,
-                                       int &CondCycles, int &TrueCycles,
+                                       unsigned DstReg, unsigned TrueReg,
+                                       unsigned FalseReg, int &CondCycles,
+                                       int &TrueCycles,
                                        int &FalseCycles) const {
   // Not all subtargets have LOCR instructions.
   if (!STI.hasLoadStoreOnCond())

diff  --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 8391970c7d9d..c3daf9e9c628 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -221,8 +221,9 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                         int *BytesAdded = nullptr) const override;
   bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
-  bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
-                       unsigned, unsigned, int&, int&, int&) const override;
+  bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
+                       unsigned, unsigned, unsigned, int &, int &,
+                       int &) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const DebugLoc &DL, unsigned DstReg,
                     ArrayRef<MachineOperand> Cond, unsigned TrueReg,

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 245346d82731..6cefb15fe9cd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -2826,11 +2826,11 @@ unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
   return Count;
 }
 
-bool X86InstrInfo::
-canInsertSelect(const MachineBasicBlock &MBB,
-                ArrayRef<MachineOperand> Cond,
-                unsigned TrueReg, unsigned FalseReg,
-                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+bool X86InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+                                   ArrayRef<MachineOperand> Cond,
+                                   unsigned DstReg, unsigned TrueReg,
+                                   unsigned FalseReg, int &CondCycles,
+                                   int &TrueCycles, int &FalseCycles) const {
   // Not all subtargets have cmov instructions.
   if (!Subtarget.hasCMov())
     return false;

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 1d2da5305357..7c7124383f1f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -306,7 +306,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
                         const DebugLoc &DL,
                         int *BytesAdded = nullptr) const override;
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
-                       unsigned, unsigned, int &, int &, int &) const override;
+                       unsigned, unsigned, unsigned, int &, int &,
+                       int &) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const DebugLoc &DL, unsigned DstReg,
                     ArrayRef<MachineOperand> Cond, unsigned TrueReg,

diff  --git a/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
new file mode 100644
index 000000000000..436baae1a2b2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/early-ifcvt-regclass-mismatch.mir
@@ -0,0 +1,171 @@
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s
+--- |
+  target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+  target triple = "arm64-apple-ios13.3.0"
+  define hidden void @phi_operands_regclasses_
diff erent() #0 {
+  entry:
+    br i1 undef, label %if.then139.i, label %if.else142.i
+
+  if.then139.i:                                     ; preds = %entry
+    %0 = load double, double* undef, align 8
+    br label %if.end161.i
+
+  if.else142.i:                                     ; preds = %entry
+    %tobool154.i = icmp eq i8 undef, 0
+    br i1 %tobool154.i, label %if.then155.i, label %if.end161.i
+
+  if.then155.i:                                     ; preds = %if.else142.i
+    %add158.i = fadd double undef, 0.000000e+00
+    br label %if.end161.i
+
+  if.end161.i:                                      ; preds = %if.then155.i, %if.else142.i, %if.then139.i
+    %y2.0.i = phi double [ %0, %if.then139.i ], [ 0.000000e+00, %if.else142.i ], [ 0.000000e+00, %if.then155.i ]
+    %add169.i = fadd double 0.000000e+00, %y2.0.i
+    %1 = fmul double undef, %add169.i
+    %add174.i = fsub double undef, %1
+    %2 = call double @llvm.fabs.f64(double %add174.i) #2
+    %cmp.i516.i = fcmp olt double %2, 0x3BC79CA10C924223
+    br i1 %cmp.i516.i, label %if.then.i.i, label %if.end9.i.i
+
+  if.then.i.i:                                      ; preds = %if.end161.i
+    %3 = call double @llvm.fabs.f64(double undef) #2
+    unreachable
+
+  if.end9.i.i:                                      ; preds = %if.end161.i
+    ret void
+  }
+  declare double @llvm.fabs.f64(double) #1
+  declare void @llvm.stackprotector(i8*, i8**) #2
+
+  attributes #0 = { "target-cpu"="apple-a7" }
+  attributes #1 = { nounwind readnone speculatable willreturn }
+  attributes #2 = { nounwind }
+
+  !llvm.ident = !{!0}
+
+  !0 = !{!"clang"}
+
+...
+---
+name:            phi_operands_regclasses_
diff erent
+alignment:       4
+exposesReturnsTwice: false
+legalized:       true
+regBankSelected: true
+selected:        true
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+registers:
+  - { id: 0, class: gpr32, preferred-register: '' }
+  - { id: 1, class: _, preferred-register: '' }
+  - { id: 2, class: _, preferred-register: '' }
+  - { id: 3, class: gpr, preferred-register: '' }
+  - { id: 4, class: gpr64, preferred-register: '' }
+  - { id: 5, class: fpr, preferred-register: '' }
+  - { id: 6, class: _, preferred-register: '' }
+  - { id: 7, class: gpr64, preferred-register: '' }
+  - { id: 8, class: gpr64common, preferred-register: '' }
+  - { id: 9, class: gpr64, preferred-register: '' }
+  - { id: 10, class: fpr64, preferred-register: '' }
+  - { id: 11, class: fpr64, preferred-register: '' }
+  - { id: 12, class: fpr, preferred-register: '' }
+  - { id: 13, class: fpr64, preferred-register: '' }
+  - { id: 14, class: fpr, preferred-register: '' }
+  - { id: 15, class: gpr32, preferred-register: '' }
+  - { id: 16, class: _, preferred-register: '' }
+  - { id: 17, class: gpr32, preferred-register: '' }
+  - { id: 18, class: gpr32, preferred-register: '' }
+  - { id: 19, class: gpr32, preferred-register: '' }
+  - { id: 20, class: gpr, preferred-register: '' }
+  - { id: 21, class: fpr64, preferred-register: '' }
+  - { id: 22, class: fpr64, preferred-register: '' }
+  - { id: 23, class: fpr64, preferred-register: '' }
+  - { id: 24, class: fpr64, preferred-register: '' }
+  - { id: 25, class: fpr64, preferred-register: '' }
+  - { id: 26, class: fpr64, preferred-register: '' }
+  - { id: 27, class: fpr64, preferred-register: '' }
+  - { id: 28, class: gpr64, preferred-register: '' }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; Here we check that we don't ifcvt to a CSEL that uses GPRs, because
+  ; some operands to the PHI have the fpr64 regclass.
+  ; CHECK-LABEL: name: phi_operands_regclasses_
diff erent
+  ; CHECK-NOT: CSELXr
+  bb.1.entry:
+    successors: %bb.2(0x40000000), %bb.3(0x40000000)
+
+    %0:gpr32 = IMPLICIT_DEF
+    %4:gpr64 = IMPLICIT_DEF
+    %8:gpr64common = IMPLICIT_DEF
+    TBNZW %0, 0, %bb.2
+    B %bb.3
+
+  bb.2.if.then139.i:
+    successors: %bb.5(0x80000000)
+
+    %7:gpr64 = LDRXui %8, 0 :: (load 8 from `double* undef`)
+    B %bb.5
+
+  bb.3.if.else142.i:
+    successors: %bb.4(0x40000000), %bb.5(0x40000000)
+
+    %26:fpr64 = FMOVD0
+    %19:gpr32 = COPY $wzr
+    CBNZW %19, %bb.5
+
+  bb.4.if.then155.i:
+    successors: %bb.5(0x80000000)
+
+    %27:fpr64 = FMOVD0
+
+  bb.5.if.end161.i:
+    successors: %bb.6(0x40000000), %bb.7(0x40000000)
+
+    %9:gpr64 = PHI %7, %bb.2, %26, %bb.3, %27, %bb.4
+    %21:fpr64 = COPY %9
+    %25:fpr64 = FMOVD0
+    %10:fpr64 = FADDDrr %25, %21
+    %22:fpr64 = COPY %4
+    %11:fpr64 = FMULDrr %22, %10
+    %23:fpr64 = COPY %4
+    %13:fpr64 = FABD64 %23, %11
+    %28:gpr64 = MOVi64imm 4307583784117748259
+    %24:fpr64 = COPY %28
+    FCMPDrr %13, %24, implicit-def $nzcv
+    %17:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv
+    TBNZW %17, 0, %bb.6
+    B %bb.7
+
+  bb.6.if.then.i.i:
+    successors:
+
+
+  bb.7.if.end9.i.i:
+    RET_ReallyLR
+
+...


        


More information about the llvm-commits mailing list