[llvm] r361888 - [AArch64][GlobalISel] Select FCMPSri/FCMPDri when comparing against 0.0

Tue May 28 15:52:49 PDT 2019

Author: paquette
Date: Tue May 28 15:52:49 2019
New Revision: 361888

URL: http://llvm.org/viewvc/llvm-project?rev=361888&view=rev
Log:
[AArch64][GlobalISel] Select FCMPSri/FCMPDri when comparing against 0.0

Add support for selecting FCMPSri and FCMPDri when comparing against 0.0, and
factor out opcode selection for G_FCMP into its own function.

Add a test to show that we don't do this with other immediates.

Differential Revision: https://reviews.llvm.org/D62539

Added:
    llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir
Modified:
    llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=361888&r1=361887&r2=361888&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Tue May 28 15:52:49 2019
@@ -741,6 +741,20 @@ static unsigned selectFPConvOpc(unsigned
   return GenericOpc;
 }
 
+/// Helper function to select the opcode for a G_FCMP.
+static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
+  // If this is a compare against +0.0, then we don't have to explicitly
+  // materialize a constant.
+  const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
+  bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
+  unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
+  if (OpSize != 32 && OpSize != 64)
+    return 0;
+  unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
+                              {AArch64::FCMPSri, AArch64::FCMPDri}};
+  return CmpOpcTbl[ShouldUseImm][OpSize == 64];
+}
+
 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
   switch (P) {
   default:
@@ -1845,15 +1859,9 @@ bool AArch64InstructionSelector::select(
       return false;
     }
 
-    unsigned CmpOpc = 0;
-    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
-    if (CmpTy == LLT::scalar(32)) {
-      CmpOpc = AArch64::FCMPSrr;
-    } else if (CmpTy == LLT::scalar(64)) {
-      CmpOpc = AArch64::FCMPDrr;
-    } else {
+    unsigned CmpOpc = selectFCMPOpc(I, MRI);
+    if (!CmpOpc)
       return false;
-    }
 
     // FIXME: regbank
 
@@ -1861,9 +1869,16 @@ bool AArch64InstructionSelector::select(
     changeFCMPPredToAArch64CC(
         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
 
-    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
-                               .addUse(I.getOperand(2).getReg())
-                               .addUse(I.getOperand(3).getReg());
+    // Partially build the compare. Decide if we need to add a use for the
+    // third operand based off whether or not we're comparing against 0.0.
+    auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+                     .addUse(I.getOperand(2).getReg());
+
+    // If we don't have an immediate compare, then we need to add a use of the
+    // register which wasn't used for the immediate.
+    // Note that the immediate will always be the last operand.
+    if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
+      CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
 
     const unsigned DefReg = I.getOperand(0).getReg();
     unsigned Def1Reg = DefReg;
@@ -1893,8 +1908,7 @@ bool AArch64InstructionSelector::select(
       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
     }
-
-    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
 
     I.eraseFromParent();

Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir?rev=361888&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/select-fcmp.mir Tue May 28 15:52:49 2019
@@ -0,0 +1,56 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+# Verify that we get FCMPSri when we compare against 0.0 and that we get
+# FCMPSrr otherwise.
+
+...
+---
+name:            zero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: zero
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: FCMPSri [[COPY]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 0.000000e+00
+    %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0
+
+...
+---
+name:            notzero
+alignment:       2
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.1:
+    liveins: $s0, $s1
+
+    ; CHECK-LABEL: name: notzero
+    ; CHECK: liveins: $s0, $s1
+    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK: [[FMOVSi:%[0-9]+]]:fpr32 = FMOVSi 112
+    ; CHECK: FCMPSrr [[COPY]], [[FMOVSi]], implicit-def $nzcv
+    ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; CHECK: $s0 = COPY [[CSINCWr]]
+    ; CHECK: RET_ReallyLR implicit $s0
+    %0:fpr(s32) = COPY $s0
+    %1:fpr(s32) = COPY $s1
+    %2:fpr(s32) = G_FCONSTANT float 1.000000e+00
+    %3:gpr(s32) = G_FCMP floatpred(oeq), %0(s32), %2
+    $s0 = COPY %3(s32)
+    RET_ReallyLR implicit $s0