[llvm] r313009 - [ARM] Use ADDCARRY / SUBCARRY
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 09:25:43 PDT 2017
I'm awfully sorry, but we're seeing the crash from PR34045 again after
this commit.
I've reverted in r313044.
On Tue, Sep 12, 2017 at 12:40 AM, Roger Ferrer Ibanez via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: rogfer01
> Date: Tue Sep 12 00:40:09 2017
> New Revision: 313009
>
> URL: http://llvm.org/viewvc/llvm-project?rev=313009&view=rev
> Log:
> [ARM] Use ADDCARRY / SUBCARRY
>
> This is a preparatory step for D34515 and also is being recommitted as its
> first version caused PR34045.
>
> This change:
> - makes nodes ISD::ADDCARRY and ISD::SUBCARRY legal for i32
> - lowering is done by first converting the boolean value into the carry flag
> using (_, C) ← (ARMISD::ADDC R, -1) and converted back to an integer value
> using (R, _) ← (ARMISD::ADDE 0, 0, C). An ARMISD::ADDE between the two
> operations does the actual addition.
> - for subtraction, given that ISD::SUBCARRY second result is actually a
> borrow, we need to invert the value of the second operand and result before
> and after using ARMISD::SUBE. We need to invert the carry result of
> ARMISD::SUBE to preserve the semantics.
> - given that the generic combiner may lower ISD::ADDCARRY and
> ISD::SUBCARRYinto ISD::UADDO and ISD::USUBO we need to update their lowering
> as well otherwise i64 operations now would require branches. This implies
> updating the corresponding test for unsigned.
> - add new combiner to remove the redundant conversions from/to carry flags
> to/from boolean values (ARMISD::ADDC (ARMISD::ADDE 0, 0, C), -1) → C
> - fixes PR34045
>
> Differential Revision: https://reviews.llvm.org/D35192
>
>
> Added:
> llvm/trunk/test/CodeGen/ARM/pr34045.ll
> Modified:
> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=313009&r1=313008&r2=313009&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Sep 12 00:40:09 2017
> @@ -802,6 +802,9 @@ ARMTargetLowering::ARMTargetLowering(con
> setOperationAction(ISD::SSUBO, MVT::i32, Custom);
> setOperationAction(ISD::USUBO, MVT::i32, Custom);
>
> + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
> + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
> +
> // i64 operation support.
> setOperationAction(ISD::MUL, MVT::i64, Expand);
> setOperationAction(ISD::MULHU, MVT::i32, Expand);
> @@ -3953,7 +3956,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue
> }
>
> SDValue
> -ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
> +ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
> // Let legalize expand this if it isn't a legal type yet.
> if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
> return SDValue();
> @@ -3975,6 +3978,66 @@ ARMTargetLowering::LowerXALUO(SDValue Op
> return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
> }
>
> +static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
> + SelectionDAG &DAG) {
> + SDLoc DL(BoolCarry);
> + EVT CarryVT = BoolCarry.getValueType();
> +
> + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
> + // This converts the boolean value carry into the carry flag by doing
> + // ARMISD::ADDC Carry, ~0
> + return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32),
> + BoolCarry, DAG.getConstant(NegOne, DL, CarryVT));
> +}
> +
> +static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
> + SelectionDAG &DAG) {
> + SDLoc DL(Flags);
> +
> + // Now convert the carry flag into a boolean carry. We do this
> + // using ARMISD:ADDE 0, 0, Carry
> + return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
> + DAG.getConstant(0, DL, MVT::i32),
> + DAG.getConstant(0, DL, MVT::i32), Flags);
> +}
> +
> +SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
> + SelectionDAG &DAG) const {
> + // Let legalize expand this if it isn't a legal type yet.
> + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
> + return SDValue();
> +
> + SDValue LHS = Op.getOperand(0);
> + SDValue RHS = Op.getOperand(1);
> + SDLoc dl(Op);
> +
> + EVT VT = Op.getValueType();
> + SDVTList VTs = DAG.getVTList(VT, MVT::i32);
> + SDValue Value;
> + SDValue Overflow;
> + switch (Op.getOpcode()) {
> + default:
> + llvm_unreachable("Unknown overflow instruction!");
> + case ISD::UADDO:
> + Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
> + // Convert the carry flag into a boolean value.
> + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
> + break;
> + case ISD::USUBO: {
> + Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
> + // Convert the carry flag into a boolean value.
> + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
> + // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
> + // value. So compute 1 - C.
> + Overflow = DAG.getNode(ISD::SUB, dl, VTs,
> + DAG.getConstant(1, dl, MVT::i32), Overflow);
> + break;
> + }
> + }
> +
> + return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
> +}
> +
> SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
> SDValue Cond = Op.getOperand(0);
> SDValue SelectTrue = Op.getOperand(1);
> @@ -7380,6 +7443,53 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(
> Op.getOperand(1), Op.getOperand(2));
> }
>
> +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
> + SDNode *N = Op.getNode();
> + EVT VT = N->getValueType(0);
> + SDVTList VTs = DAG.getVTList(VT, MVT::i32);
> +
> + SDValue Carry = Op.getOperand(2);
> + EVT CarryVT = Carry.getValueType();
> +
> + SDLoc DL(Op);
> +
> + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
> +
> + SDValue Result;
> + if (Op.getOpcode() == ISD::ADDCARRY) {
> + // This converts the boolean value carry into the carry flag.
> + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
> +
> + // Do the addition proper using the carry flag we wanted.
> + Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
> + Op.getOperand(1), Carry.getValue(1));
> +
> + // Now convert the carry flag into a boolean value.
> + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
> + } else {
> + // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
> + // have to invert the carry first.
> + Carry =
> + DAG.getNode(ISD::SUB, DL, VTs, DAG.getConstant(1, DL, MVT::i32), Carry);
> + // This converts the boolean value carry into the carry flag.
> + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
> +
> + // Do the subtraction proper using the carry flag we wanted.
> + Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
> + Op.getOperand(1), Carry.getValue(1));
> +
> + // Now convert the carry flag into a boolean value.
> + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
> + // But the carry returned by ARMISD::SUBE is not a borrow as expected
> + // by ISD::SUBCARRY, so compute 1 - C.
> + Carry =
> + DAG.getNode(ISD::SUB, DL, VTs, DAG.getConstant(1, DL, MVT::i32), Carry);
> + }
> +
> + // Return both values.
> + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
> +}
> +
> SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
> assert(Subtarget->isTargetDarwin());
>
> @@ -7734,11 +7844,14 @@ SDValue ARMTargetLowering::LowerOperatio
> case ISD::ADDE:
> case ISD::SUBC:
> case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
> + case ISD::ADDCARRY:
> + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
> case ISD::SADDO:
> - case ISD::UADDO:
> case ISD::SSUBO:
> + return LowerSignedALUO(Op, DAG);
> + case ISD::UADDO:
> case ISD::USUBO:
> - return LowerXALUO(Op, DAG);
> + return LowerUnsignedALUO(Op, DAG);
> case ISD::ATOMIC_LOAD:
> case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
> case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
> @@ -9687,11 +9800,11 @@ static SDValue AddCombineTo64bitMLAL(SDN
> // a S/UMLAL instruction.
> // UMUL_LOHI
> // / :lo \ :hi
> - // / \ [no multiline comment]
> - // loAdd -> ADDE |
> - // \ :glue /
> - // \ /
> - // ADDC <- hiAdd
> + // V \ [no multiline comment]
> + // loAdd -> ADDC |
> + // \ :carry /
> + // V V
> + // ADDE <- hiAdd
> //
> assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE");
>
> @@ -9699,7 +9812,7 @@ static SDValue AddCombineTo64bitMLAL(SDN
> AddeNode->getOperand(2).getValueType() == MVT::i32 &&
> "ADDE node has the wrong inputs");
>
> - // Check that we have a glued ADDC node.
> + // Check that we are chained to the right ADDC node.
> SDNode* AddcNode = AddeNode->getOperand(2).getNode();
> if (AddcNode->getOpcode() != ARMISD::ADDC)
> return SDValue();
> @@ -9750,7 +9863,7 @@ static SDValue AddCombineTo64bitMLAL(SDN
> SDValue* LoMul = nullptr;
> SDValue* LowAdd = nullptr;
>
> - // Ensure that ADDE is from high result of ISD::SMUL_LOHI.
> + // Ensure that ADDE is from high result of ISD::xMUL_LOHI.
> if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1)))
> return SDValue();
>
> @@ -9775,6 +9888,11 @@ static SDValue AddCombineTo64bitMLAL(SDN
> if (!LoMul)
> return SDValue();
>
> + // If HiAdd is a predecessor of ADDC, the replacement below will create a
> + // cycle.
> + if (AddcNode->isPredecessorOf(HiAdd->getNode()))
> + return SDValue();
> +
> // Create the merged node.
> SelectionDAG &DAG = DCI.DAG;
>
> @@ -9877,8 +9995,22 @@ static SDValue PerformUMLALCombine(SDNod
> return SDValue();
> }
>
> -static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG,
> +static SDValue PerformAddcSubcCombine(SDNode *N,
> + TargetLowering::DAGCombinerInfo &DCI,
> const ARMSubtarget *Subtarget) {
> + SelectionDAG &DAG(DCI.DAG);
> +
> + if (N->getOpcode() == ARMISD::ADDC) {
> + // (ADDC (ADDE 0, 0, C), -1) -> C
> + SDValue LHS = N->getOperand(0);
> + SDValue RHS = N->getOperand(1);
> + if (LHS->getOpcode() == ARMISD::ADDE &&
> + isNullConstant(LHS->getOperand(0)) &&
> + isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
> + return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
> + }
> + }
> +
> if (Subtarget->isThumb1Only()) {
> SDValue RHS = N->getOperand(1);
> if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
> @@ -11767,6 +11899,14 @@ static SDValue PerformExtendCombine(SDNo
> return SDValue();
> }
>
> +static const APInt *isPowerOf2Constant(SDValue V) {
> + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
> + if (!C)
> + return nullptr;
> + const APInt *CV = &C->getAPIntValue();
> + return CV->isPowerOf2() ? CV : nullptr;
> +}
> +
> SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
> // If we have a CMOV, OR and AND combination such as:
> // if (x & CN)
> @@ -11795,8 +11935,8 @@ SDValue ARMTargetLowering::PerformCMOVTo
> SDValue And = CmpZ->getOperand(0);
> if (And->getOpcode() != ISD::AND)
> return SDValue();
> - ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
> - if (!AndC || !AndC->getAPIntValue().isPowerOf2())
> + const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
> + if (!AndC)
> return SDValue();
> SDValue X = And->getOperand(0);
>
> @@ -11836,7 +11976,7 @@ SDValue ARMTargetLowering::PerformCMOVTo
> SDValue V = Y;
> SDLoc dl(X);
> EVT VT = X.getValueType();
> - unsigned BitInX = AndC->getAPIntValue().logBase2();
> + unsigned BitInX = AndC->logBase2();
>
> if (BitInX != 0) {
> // We must shift X first.
> @@ -11997,7 +12137,7 @@ SDValue ARMTargetLowering::PerformDAGCom
> case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
> case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
> case ARMISD::ADDC:
> - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget);
> + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
> case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget);
> case ARMISD::BFI: return PerformBFICombine(N, DCI);
> case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
> @@ -12693,10 +12833,17 @@ void ARMTargetLowering::computeKnownBits
> case ARMISD::ADDE:
> case ARMISD::SUBC:
> case ARMISD::SUBE:
> - // These nodes' second result is a boolean
> - if (Op.getResNo() == 0)
> - break;
> - Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
> + // Special cases when we convert a carry to a boolean.
> + if (Op.getResNo() == 0) {
> + SDValue LHS = Op.getOperand(0);
> + SDValue RHS = Op.getOperand(1);
> + // (ADDE 0, 0, C) will give us a single bit.
> + if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
> + isNullConstant(RHS)) {
> + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
> + return;
> + }
> + }
> break;
> case ARMISD::CMOV: {
> // Bits are known zero/one if known on the LHS and RHS.
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=313009&r1=313008&r2=313009&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue Sep 12 00:40:09 2017
> @@ -625,7 +625,8 @@ class InstrItineraryData;
> SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
> - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
> + SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
> + SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
>
> Modified: llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll?rev=313009&r1=313008&r2=313009&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll Tue Sep 12 00:40:09 2017
> @@ -1,4 +1,6 @@
> -; RUN: llc < %s -mtriple=arm-linux -mcpu=generic | FileCheck %s
> +; RUN: llc < %s -mtriple=arm-linux -mcpu=generic -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
> +; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV6
> +; RUN: llc < %s -mtriple=thumbv7-eabi -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=THUMBV7
>
> define i32 @uadd_overflow(i32 %a, i32 %b) #0 {
> %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
> @@ -7,10 +9,19 @@ define i32 @uadd_overflow(i32 %a, i32 %b
> ret i32 %2
>
> ; CHECK-LABEL: uadd_overflow:
> - ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
> - ; CHECK: mov r[[R1]], #1
> - ; CHECK: cmp r[[R2]], r[[R0]]
> - ; CHECK: movhs r[[R1]], #0
> +
> + ; ARM: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
> + ; ARM: mov r[[R2:[0-9]+]], #0
> + ; ARM: adc r[[R0]], r[[R2]], #0
> +
> + ; THUMBV6: movs r[[R2:[0-9]+]], #0
> + ; THUMBV6: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
> + ; THUMBV6: adcs r[[R2]], r[[R2]]
> + ; THUMBV6: mov r[[R0]], r[[R2]]
> +
> + ; THUMBV7: adds r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
> + ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
> + ; THUMBV7: adc r[[R0]], r[[R2]], #0
> }
>
>
> @@ -21,10 +32,26 @@ define i32 @sadd_overflow(i32 %a, i32 %b
> ret i32 %2
>
> ; CHECK-LABEL: sadd_overflow:
> - ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
> - ; CHECK: mov r[[R1]], #1
> - ; CHECK: cmp r[[R2]], r[[R0]]
> - ; CHECK: movvc r[[R1]], #0
> +
> + ; ARM: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
> + ; ARM: mov r[[R1]], #1
> + ; ARM: cmp r[[R2]], r[[R0]]
> + ; ARM: movvc r[[R1]], #0
> +
> + ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]]
> + ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]]
> + ; THUMBV6: movs r[[R0]], #0
> + ; THUMBV6: movs r[[R1]], #1
> + ; THUMBV6: cmp r[[R3]], r[[R2]]
> + ; THUMBV6: bvc .L[[LABEL:.*]]
> + ; THUMBV6: mov r[[R0]], r[[R1]]
> + ; THUMBV6: .L[[LABEL]]:
> +
> + ; THUMBV7: movs r[[R1]], #1
> + ; THUMBV7: cmp r[[R2]], r[[R0]]
> + ; THUMBV7: it vc
> + ; THUMBV7: movvc r[[R1]], #0
> + ; THUMBV7: mov r[[R0]], r[[R1]]
> }
>
> define i32 @usub_overflow(i32 %a, i32 %b) #0 {
> @@ -34,9 +61,26 @@ define i32 @usub_overflow(i32 %a, i32 %b
> ret i32 %2
>
> ; CHECK-LABEL: usub_overflow:
> - ; CHECK: mov r[[R2]], #1
> - ; CHECK: cmp r[[R0]], r[[R1]]
> - ; CHECK: movhs r[[R2]], #0
> +
> + ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
> + ; ARM: mov r[[R2:[0-9]+]], #0
> + ; ARM: adc r[[R0]], r[[R2]], #0
> + ; ARM: rsb r[[R0]], r[[R0]], #1
> +
> + ; THUMBV6: movs r[[R2:[0-9]+]], #0
> + ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
> + ; THUMBV6: adcs r[[R2]], r[[R2]]
> + ; THUMBV6: movs r[[R0]], #1
> + ; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]]
> +
> + ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]]
> + ; THUMBV7: mov.w r[[R2:[0-9]+]], #0
> + ; THUMBV7: adc r[[R0]], r[[R2]], #0
> + ; THUMBV7: rsb.w r[[R0]], r[[R0]], #1
> +
> + ; We should know that the overflow is just 1 bit,
> + ; no need to clear any other bit
> + ; CHECK-NOT: and
> }
>
> define i32 @ssub_overflow(i32 %a, i32 %b) #0 {
> @@ -46,9 +90,23 @@ define i32 @ssub_overflow(i32 %a, i32 %b
> ret i32 %2
>
> ; CHECK-LABEL: ssub_overflow:
> - ; CHECK: mov r[[R2]], #1
> - ; CHECK: cmp r[[R0]], r[[R1]]
> - ; CHECK: movvc r[[R2]], #0
> +
> + ; ARM: mov r[[R2]], #1
> + ; ARM: cmp r[[R0]], r[[R1]]
> + ; ARM: movvc r[[R2]], #0
> +
> + ; THUMBV6: movs r[[R0]], #0
> + ; THUMBV6: movs r[[R3:[0-9]+]], #1
> + ; THUMBV6: cmp r[[R2]], r[[R1:[0-9]+]]
> + ; THUMBV6: bvc .L[[LABEL:.*]]
> + ; THUMBV6: mov r[[R0]], r[[R3]]
> + ; THUMBV6: .L[[LABEL]]:
> +
> + ; THUMBV7: movs r[[R2:[0-9]+]], #1
> + ; THUMBV7: cmp r[[R0:[0-9]+]], r[[R1:[0-9]+]]
> + ; THUMBV7: it vc
> + ; THUMBV7: movvc r[[R2]], #0
> + ; THUMBV7: mov r[[R0]], r[[R2]]
> }
>
> declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
>
> Added: llvm/trunk/test/CodeGen/ARM/pr34045.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/pr34045.ll?rev=313009&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/pr34045.ll (added)
> +++ llvm/trunk/test/CodeGen/ARM/pr34045.ll Tue Sep 12 00:40:09 2017
> @@ -0,0 +1,53 @@
> +; RUN: llc < %s -mtriple thumbv7 | FileCheck %s
> +
> +; ModuleID = 'bugpoint-reduced-simplified.bc'
> +define hidden void @bn_mul_comba8(i32* nocapture %r, i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr {
> +entry:
> +; This test is actually checking that no cycle is introduced but at least we
> +; want to see a couple of umull and one umlal in the output
> +; CHECK: umull
> +; CHECK: umull
> +; CHECK: umlal
> + %0 = load i32, i32* %a, align 4
> + %conv = zext i32 %0 to i64
> + %1 = load i32, i32* %b, align 4
> + %conv2 = zext i32 %1 to i64
> + %mul = mul nuw i64 %conv2, %conv
> + %shr = lshr i64 %mul, 32
> + %2 = load i32, i32* %a, align 4
> + %conv13 = zext i32 %2 to i64
> + %3 = load i32, i32* undef, align 4
> + %conv15 = zext i32 %3 to i64
> + %mul16 = mul nuw i64 %conv15, %conv13
> + %add18 = add i64 %mul16, %shr
> + %shr20 = lshr i64 %add18, 32
> + %conv21 = trunc i64 %shr20 to i32
> + %4 = load i32, i32* undef, align 4
> + %conv34 = zext i32 %4 to i64
> + %5 = load i32, i32* %b, align 4
> + %conv36 = zext i32 %5 to i64
> + %mul37 = mul nuw i64 %conv36, %conv34
> + %conv38 = and i64 %add18, 4294967295
> + %add39 = add i64 %mul37, %conv38
> + %shr41 = lshr i64 %add39, 32
> + %conv42 = trunc i64 %shr41 to i32
> + %add43 = add i32 %conv42, %conv21
> + %cmp44 = icmp ult i32 %add43, %conv42
> + %c1.1 = zext i1 %cmp44 to i32
> + %add65 = add i32 0, %c1.1
> + %add86 = add i32 %add65, 0
> + %add107 = add i32 %add86, 0
> + %conv124 = zext i32 %add107 to i64
> + %add125 = add i64 0, %conv124
> + %conv145 = and i64 %add125, 4294967295
> + %add146 = add i64 %conv145, 0
> + %conv166 = and i64 %add146, 4294967295
> + %add167 = add i64 %conv166, 0
> + %conv187 = and i64 %add167, 4294967295
> + %add188 = add i64 %conv187, 0
> + %conv189 = trunc i64 %add188 to i32
> + %arrayidx200 = getelementptr inbounds i32, i32* %r, i32 3
> + store i32 %conv189, i32* %arrayidx200, align 4
> + ret void
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list