[llvm] r309923 - [ARM] Use ADDCARRY / SUBCARRY

Thu Aug 3 00:45:10 PDT 2017

Author: rogfer01
Date: Thu Aug  3 00:45:10 2017
New Revision: 309923

URL: http://llvm.org/viewvc/llvm-project?rev=309923&view=rev
Log:
[ARM] Use ADDCARRY / SUBCARRY

This patch:

- makes nodes ISD::ADDCARRY and ISD::SUBCARRY legal for i32
- lowering is done by first converting the boolean value into the carry flag
  using (_, C) <- (ARMISD::ADDC R, -1) and converted back to an integer value
  using (R, _) <- (ARMISD::ADDE 0, 0, C). An ARMISD::ADDE between the two
  operations does the actual addition.
- for subtraction, given that ISD::SUBCARRY second result is actually a
  borrow, we need to invert the value of the second operand and result before
  and after using ARMISD::SUBE. We need to invert the carry result of
  ARMISD::SUBE to preserve the semantics.
- given that the generic combiner may lower ISD::ADDCARRY and
  ISD::SUBCARRY into ISD::UADDO and ISD::USUBO we need to update their lowering
  as well otherwise i64 operations now would require branches. This implies
  updating the corresponding test for unsigned.
- add new combiner to remove the redundant conversions from/to carry flags
  to/from boolean values (ARMISD::ADDC (ARMISD::ADDE 0, 0, C), -1) -> C

Differential Revision: https://reviews.llvm.org/D35192


Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=309923&r1=309922&r2=309923&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Aug  3 00:45:10 2017
@@ -801,6 +801,9 @@ ARMTargetLowering::ARMTargetLowering(con
   setOperationAction(ISD::SSUBO, MVT::i32, Custom);
   setOperationAction(ISD::USUBO, MVT::i32, Custom);
 
+  setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
+  setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
+
   // i64 operation support.
   setOperationAction(ISD::MUL,     MVT::i64, Expand);
   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
@@ -3952,7 +3955,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue
 }
 
 SDValue
-ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
   // Let legalize expand this if it isn't a legal type yet.
   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
     return SDValue();
@@ -3974,6 +3977,66 @@ ARMTargetLowering::LowerXALUO(SDValue Op
   return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
 }
 
+static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
+                                              SelectionDAG &DAG) {
+  SDLoc DL(BoolCarry);
+  EVT CarryVT = BoolCarry.getValueType();
+
+  APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+  // This converts the boolean value carry into the carry flag by doing
+  // ARMISD::ADDC Carry, ~0
+  return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32),
+                     BoolCarry, DAG.getConstant(NegOne, DL, CarryVT));
+}
+
+static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
+                                              SelectionDAG &DAG) {
+  SDLoc DL(Flags);
+
+  // Now convert the carry flag into a boolean carry. We do this
+  // using ARMISD:ADDE 0, 0, Carry
+  return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
+                     DAG.getConstant(0, DL, MVT::i32),
+                     DAG.getConstant(0, DL, MVT::i32), Flags);
+}
+
+SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+    return SDValue();
+
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDLoc dl(Op);
+
+  EVT VT = Op.getValueType();
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+  SDValue Value;
+  SDValue Overflow;
+  switch (Op.getOpcode()) {
+  default:
+    llvm_unreachable("Unknown overflow instruction!");
+  case ISD::UADDO:
+    Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
+    // Convert the carry flag into a boolean value.
+    Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
+    break;
+  case ISD::USUBO: {
+    Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
+    // Convert the carry flag into a boolean value.
+    Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
+    // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
+    // value. So compute 1 - C.
+    Overflow = DAG.getNode(ISD::SUB, dl, VTs,
+                           DAG.getConstant(1, dl, MVT::i32), Overflow);
+    break;
+  }
+  }
+
+  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
+}
+
 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Cond = Op.getOperand(0);
   SDValue SelectTrue = Op.getOperand(1);
@@ -7378,6 +7441,53 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(
                      Op.getOperand(1), Op.getOperand(2));
 }
 
+static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
+  SDNode *N = Op.getNode();
+  EVT VT = N->getValueType(0);
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+  SDValue Carry = Op.getOperand(2);
+  EVT CarryVT = Carry.getValueType();
+
+  SDLoc DL(Op);
+
+  APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+
+  SDValue Result;
+  if (Op.getOpcode() == ISD::ADDCARRY) {
+    // This converts the boolean value carry into the carry flag.
+    Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
+
+    // Do the addition proper using the carry flag we wanted.
+    Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
+                         Op.getOperand(1), Carry.getValue(1));
+
+    // Now convert the carry flag into a boolean value.
+    Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
+  } else {
+    // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
+    // have to invert the carry first.
+    Carry =
+        DAG.getNode(ISD::SUB, DL, VTs, DAG.getConstant(1, DL, MVT::i32), Carry);
+    // This converts the boolean value carry into the carry flag.
+    Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
+
+    // Do the subtraction proper using the carry flag we wanted.
+    Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
+                         Op.getOperand(1), Carry.getValue(1));
+
+    // Now convert the carry flag into a boolean value.
+    Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
+    // But the carry returned by ARMISD::SUBE is not a borrow as expected
+    // by ISD::SUBCARRY, so compute 1 - C.
+    Carry =
+        DAG.getNode(ISD::SUB, DL, VTs, DAG.getConstant(1, DL, MVT::i32), Carry);
+  }
+
+  // Return both values.
+  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
+}
+
 SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetDarwin());
 
@@ -7732,11 +7842,14 @@ SDValue ARMTargetLowering::LowerOperatio
   case ISD::ADDE:
   case ISD::SUBC:
   case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+  case ISD::ADDCARRY:
+  case ISD::SUBCARRY:      return LowerADDSUBCARRY(Op, DAG);
   case ISD::SADDO:
-  case ISD::UADDO:
   case ISD::SSUBO:
+    return LowerSignedALUO(Op, DAG);
+  case ISD::UADDO:
   case ISD::USUBO:
-    return LowerXALUO(Op, DAG);
+    return LowerUnsignedALUO(Op, DAG);
   case ISD::ATOMIC_LOAD:
   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
   case ISD::FSINCOS:       return LowerFSINCOS(Op, DAG);
@@ -9871,8 +9984,22 @@ static SDValue PerformUMLALCombine(SDNod
     return SDValue();
 }
 
-static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
+static SDValue PerformAddcSubcCombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI,
                                       const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG(DCI.DAG);
+
+  if (N->getOpcode() == ARMISD::ADDC) {
+    // (ADDC (ADDE 0, 0, C), -1) -> C
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+    if (LHS->getOpcode() == ARMISD::ADDE &&
+        isNullConstant(LHS->getOperand(0)) &&
+        isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
+      return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
+    }
+  }
+
   if (Subtarget->isThumb1Only()) {
     SDValue RHS = N->getOperand(1);
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
@@ -11761,6 +11888,14 @@ static SDValue PerformExtendCombine(SDNo
   return SDValue();
 }
 
+static const APInt *isPowerOf2Constant(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  if (!C)
+    return nullptr;
+  const APInt *CV = &C->getAPIntValue();
+  return CV->isPowerOf2() ? CV : nullptr;
+}
+
 SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
   // If we have a CMOV, OR and AND combination such as:
   //   if (x & CN)
@@ -11789,8 +11924,8 @@ SDValue ARMTargetLowering::PerformCMOVTo
   SDValue And = CmpZ->getOperand(0);
   if (And->getOpcode() != ISD::AND)
     return SDValue();
-  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
-  if (!AndC || !AndC->getAPIntValue().isPowerOf2())
+  const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
+  if (!AndC)
     return SDValue();
   SDValue X = And->getOperand(0);
 
@@ -11830,7 +11965,7 @@ SDValue ARMTargetLowering::PerformCMOVTo
   SDValue V = Y;
   SDLoc dl(X);
   EVT VT = X.getValueType();
-  unsigned BitInX = AndC->getAPIntValue().logBase2();
+  unsigned BitInX = AndC->logBase2();
 
   if (BitInX != 0) {
     // We must shift X first.
@@ -11991,7 +12126,7 @@ SDValue ARMTargetLowering::PerformDAGCom
   case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
   case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
   case ARMISD::ADDC:
-  case ARMISD::SUBC:    return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
+  case ARMISD::SUBC:    return PerformAddcSubcCombine(N, DCI, Subtarget);
   case ARMISD::SUBE:    return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
   case ARMISD::BFI:     return PerformBFICombine(N, DCI);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
@@ -12665,10 +12800,17 @@ void ARMTargetLowering::computeKnownBits
   case ARMISD::ADDE:
   case ARMISD::SUBC:
   case ARMISD::SUBE:
-    // These nodes' second result is a boolean
-    if (Op.getResNo() == 0)
-      break;
-    Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    // Special cases when we convert a carry to a boolean.
+    if (Op.getResNo() == 0) {
+      SDValue LHS = Op.getOperand(0);
+      SDValue RHS = Op.getOperand(1);
+      // (ADDE 0, 0, C) will give us a single bit.
+      if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
+          isNullConstant(RHS)) {
+        Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+        return;
+      }
+    }
     break;
   case ARMISD::CMOV: {
     // Bits are known zero/one if known on the LHS and RHS.

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=309923&r1=309922&r2=309923&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu Aug  3 00:45:10 2017
@@ -618,7 +618,8 @@ class InstrItineraryData;
     SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;

Modified: llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll?rev=309923&r1=309922&r2=309923&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll Thu Aug  3 00:45:10 2017
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6
+; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7
 
 define i32 @uadd_overflow(i32 %a, i32 %b) #0 {
   %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
@@ -7,10 +9,19 @@ define i32 @uadd_overflow(i32 %a, i32 %b
   ret i32 %2
 
   ; CHECK-LABEL: uadd_overflow:
-  ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
-  ; CHECK: mov r[[R1]], #1
-  ; CHECK: cmp r[[R2]], r[[R0]]
-  ; CHECK: movhs r[[R1]], #0
+
+  ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+  ; ARM: mov r[[R2:[0-9]+]], #0
+  ; ARM: adc r[[R0]], r[[R2]], #0
+
+  ; THUMBV6: movs    r[[R2:[0-9]+]], #0
+  ; THUMBV6: adds    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+  ; THUMBV6: adcs    r[[R2]], r[[R2]]
+  ; THUMBV6: mov     r[[R0]], r[[R2]]
+
+  ; THUMBV7: adds  r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+  ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
+  ; THUMBV7: adc   r[[R0]], r[[R2]], #0
 }
 
 
@@ -21,10 +32,26 @@ define i32 @sadd_overflow(i32 %a, i32 %b
   ret i32 %2
 
   ; CHECK-LABEL: sadd_overflow:
-  ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
-  ; CHECK: mov r[[R1]], #1
-  ; CHECK: cmp r[[R2]], r[[R0]]
-  ; CHECK: movvc r[[R1]], #0
+
+  ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
+  ; ARM: mov r[[R1]], #1
+  ; ARM: cmp r[[R2]], r[[R0]]
+  ; ARM: movvc r[[R1]], #0
+
+  ; THUMBV6: mov  r[[R2:[0-9]+]], r[[R0:[0-9]+]]
+  ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
+  ; THUMBV6: movs r[[R0]], #0
+  ; THUMBV6: movs r[[R1]], #1
+  ; THUMBV6: cmp  r[[R3]], r[[R2]]
+  ; THUMBV6: bvc  .L[[LABEL:.*]]
+  ; THUMBV6: mov  r[[R0]], r[[R1]]
+  ; THUMBV6: .L[[LABEL]]:
+
+  ; THUMBV7: movs  r[[R1]], #1
+  ; THUMBV7: cmp   r[[R2]], r[[R0]]
+  ; THUMBV7: it    vc
+  ; THUMBV7: movvc r[[R1]], #0
+  ; THUMBV7: mov   r[[R0]], r[[R1]]
 }
 
 define i32 @usub_overflow(i32 %a, i32 %b) #0 {
@@ -34,9 +61,26 @@ define i32 @usub_overflow(i32 %a, i32 %b
   ret i32 %2
 
   ; CHECK-LABEL: usub_overflow:
-  ; CHECK: mov r[[R2]], #1
-  ; CHECK: cmp r[[R0]], r[[R1]]
-  ; CHECK: movhs r[[R2]], #0
+
+  ; ARM: subs    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+  ; ARM: mov     r[[R2:[0-9]+]], #0
+  ; ARM: adc     r[[R0]], r[[R2]], #0
+  ; ARM: rsb     r[[R0]], r[[R0]], #1
+
+  ; THUMBV6: movs    r[[R2:[0-9]+]], #0
+  ; THUMBV6: subs    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+  ; THUMBV6: adcs    r[[R2]], r[[R2]]
+  ; THUMBV6: movs    r[[R0]], #1
+  ; THUMBV6: subs    r[[R0]], r[[R0]], r[[R2]]
+
+  ; THUMBV7: subs    r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
+  ; THUMBV7: mov.w   r[[R2:[0-9]+]], #0
+  ; THUMBV7: adc     r[[R0]], r[[R2]], #0
+  ; THUMBV7: rsb.w   r[[R0]], r[[R0]], #1
+
+  ; We should know that the overflow is just 1 bit,
+  ; no need to clear any other bit
+  ; CHECK-NOT: and
 }
 
 define i32 @ssub_overflow(i32 %a, i32 %b) #0 {
@@ -46,9 +90,23 @@ define i32 @ssub_overflow(i32 %a, i32 %b
   ret i32 %2
 
   ; CHECK-LABEL: ssub_overflow:
-  ; CHECK: mov r[[R2]], #1
-  ; CHECK: cmp r[[R0]], r[[R1]]
-  ; CHECK: movvc r[[R2]], #0
+
+  ; ARM: mov r[[R2]], #1
+  ; ARM: cmp r[[R0]], r[[R1]]
+  ; ARM: movvc r[[R2]], #0
+
+  ; THUMBV6: movs    r[[R0]], #0
+  ; THUMBV6: movs    r[[R3:[0-9]+]], #1
+  ; THUMBV6: cmp     r[[R2]], r[[R1:[0-9]+]]
+  ; THUMBV6: bvc     .L[[LABEL:.*]]
+  ; THUMBV6: mov     r[[R0]], r[[R3]]
+  ; THUMBV6: .L[[LABEL]]:
+
+  ; THUMBV7: movs  r[[R2:[0-9]+]], #1
+  ; THUMBV7: cmp   r[[R0:[0-9]+]], r[[R1:[0-9]+]]
+  ; THUMBV7: it    vc
+  ; THUMBV7: movvc r[[R2]], #0
+  ; THUMBV7: mov   r[[R0]], r[[R2]]
 }
 
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1