[PATCH] [ARMv8] CodeGen for VSEL

Thu Aug 22 05:25:10 PDT 2013

Big cleanup, thanks to Tim's CMOV work.

http://llvm-reviews.chandlerc.com/D1445

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D1445?vs=3592&id=3664#toc

Files:
  lib/Target/ARM/ARMISelLowering.cpp
  lib/Target/ARM/ARMInstrVFP.td

Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================

--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -3178,6 +3178,61 @@
                          SelectTrue, SelectFalse, ISD::SETNE);
 }
 
+static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) {
+  if (CC == ISD::SETNE)
+    return ISD::SETEQ;
+  return ISD::getSetCCSwappedOperands(CC);
+}
+
+static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+                                 bool &swpCmpOps, bool &swpVselOps) {
+  // Start by selecting the GE condition code for opcodes that return true for
+  // 'equality'
+  if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
+      CC == ISD::SETULE)
+    CondCode = ARMCC::GE;
+
+  // and GT for opcodes that return false for 'equality'.
+  else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
+           CC == ISD::SETULT)
+    CondCode = ARMCC::GT;
+
+  // Since we are constrained to GE/GT, if the opcode contains 'less', we need
+  // to swap the compare operands.
+  if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
+      CC == ISD::SETULT)
+    swpCmpOps = true;
+
+  // Both GT and GE are ordered comparisons, and return false for 'unordered'.
+  // If we have an unordered opcode, we need to swap the operands to the VSEL
+  // instruction (effectively negating the condition).
+  //
+  // This also has the effect of swapping which one of 'less' or 'greater'
+  // returns true, so we also swap the compare operands. It also switches
+  // whether we return true for 'equality', so we compensate by picking the
+  // opposite condition code to our original choice.
+  if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
+      CC == ISD::SETUGT) {
+    swpCmpOps = !swpCmpOps;
+    swpVselOps = !swpVselOps;
+    CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
+  }
+
+  // 'ordered' is 'anything but unordered', so use the VS condition code and
+  // swap the VSEL operands.
+  if (CC == ISD::SETO) {
+    CondCode = ARMCC::VS;
+    swpVselOps = true;
+  }
+
+  // 'unordered or not equal' is 'anything but equal', so use the EQ condition
+  // code and swap the VSEL operands.
+  if (CC == ISD::SETUNE) {
+    CondCode = ARMCC::EQ;
+    swpVselOps = true;
+  }
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
@@ -3188,15 +3243,52 @@
   SDLoc dl(Op);
 
   if (LHS.getValueType() == MVT::i32) {
+    // Try to generate VSEL on ARMv8.
+    // The VSEL instruction can't use all the usual ARM condition
+    // codes: it only has two bits to select the condition code, so it's
+    // constrained to use only GE, GT, VS and EQ.
+    //
+    // To implement all the various ISD::SETXXX opcodes, we sometimes need to
+    // swap the operands of the previous compare instruction (effectively
+    // inverting the compare condition, swapping 'less' and 'greater') and
+    // sometimes need to swap the operands to the VSEL (which inverts the
+    // condition in the sense of firing whenever the previous condition didn't)
+    if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
+                                      TrueVal.getValueType() == MVT::f64)) {
+      ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+      if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
+          CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
+        CC = getInverseCCForVSEL(CC);
+        std::swap(TrueVal, FalseVal);
+      }
+    }
+
     SDValue ARMcc;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
-    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
+                       Cmp);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
   FPCCToARMCC(CC, CondCode, CondCode2);
 
+  // Try to generate VSEL on ARMv8.
+  if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 ||
+                                    TrueVal.getValueType() == MVT::f64)) {
+    bool swpCmpOps = false;
+    bool swpVselOps = false;
+    checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
+
+      if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
+        CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
+      if (swpCmpOps)
+        std::swap(LHS, RHS);
+      if (swpVselOps)
+        std::swap(TrueVal, FalseVal);
+    }
+  }
+
   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
Index: lib/Target/ARM/ARMInstrVFP.td
===================================================================
--- lib/Target/ARM/ARMInstrVFP.td
+++ lib/Target/ARM/ARMInstrVFP.td
@@ -333,8 +333,8 @@
   let D = VFPNeonA8Domain;
 }
 
-multiclass vsel_inst<string op, bits<2> opc> {
-  let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
+multiclass vsel_inst<string op, bits<2> opc, int CC> {
+  let DecoderNamespace = "VFPV8", PostEncoderMethod = "", Uses = [CPSR] in {
     def S : ASbInp<0b11100, opc, 0,
                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
                    NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"),
@@ -345,12 +345,20 @@
                    NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"),
                    []>, Requires<[HasV8FP]>;
   }
+
+  let AddedComplexity = 4 in {
+    def : Pat<(ARMcmov SPR:$Sn, SPR:$Sm, CC),
+              (!cast<Instruction>(NAME#"S") SPR:$Sm, SPR:$Sn)>;
+    def : Pat<(ARMcmov (f64 DPR:$Dn), (f64 DPR:$Dm), CC),
+              (!cast<Instruction>(NAME#"D") DPR:$Dm, DPR:$Dn)>;
+  }
 }
 
-defm VSELGT : vsel_inst<"gt", 0b11>;
-defm VSELGE : vsel_inst<"ge", 0b10>;
-defm VSELEQ : vsel_inst<"eq", 0b00>;
-defm VSELVS : vsel_inst<"vs", 0b01>;
+// The CC constants here match ARMCC::CondCodes.
+defm VSELGT : vsel_inst<"gt", 0b11, 12>;
+defm VSELGE : vsel_inst<"ge", 0b10, 10>;
+defm VSELEQ : vsel_inst<"eq", 0b00, 0>;
+defm VSELVS : vsel_inst<"vs", 0b01, 6>;
 
 multiclass vmaxmin_inst<string op, bit opc> {
   let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1445.2.patch
Type: text/x-patch
Size: 6355 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130822/6d9a92f8/attachment.bin>