[llvm] r208435 - Add custom lowering for add/sub with overflow intrinsics to ARM
Reid Kleckner
rnk at google.com
Fri May 9 14:50:35 PDT 2014
This test is failing for me locally and on Takumi's bot:
http://bb.pgr.jp/builders/ninja-x64-msvc-RA-centos6/builds/2593/steps/test_all/logs/LLVM%20%3A%3A%20CodeGen__ARM__intrinsics-overflow.ll
-- Testing: 10688 tests, 32 threads --
FAIL: LLVM :: CodeGen/ARM/intrinsics-overflow.ll (1128 of 10688)
******************** TEST 'LLVM :: CodeGen/ARM/intrinsics-overflow.ll'
FAILED ********************
Script:
--
D:/src/llvm/build/./bin\llc.EXE <
D:\src\llvm\test\CodeGen\ARM\intrinsics-overflow.ll -march=arm
-mcpu=generic | D:/src/llvm/build/./bin\FileCheck.EXE
D:\src\llvm\test\CodeGen\ARM\intrinsics-overflow.ll
--
Exit Code: 2
Command Output (stdout):
--
Command 0: "D:/src/llvm/build/./bin\llc.EXE" "-march=arm" "-mcpu=generic"
Command 0 Result: 1
Command 0 Output:
Command 0 Stderr:
LLVM ERROR: CPU: 'generic' does not support ARM mode execution!
Stack dump:
0. Program arguments: D:/src/llvm/build/./bin\llc.EXE -march=arm
-mcpu=generic
Command 1: "D:/src/llvm/build/./bin\FileCheck.EXE"
"D:\src\llvm\test\CodeGen\ARM\intrinsics-overflow.ll"
Command 1 Result: 2
Command 1 Output:
Command 1 Stderr:
FileCheck error: '-' is empty.
--
On Fri, May 9, 2014 at 10:02 AM, Louis Gerbarg <lgg at apple.com> wrote:
> Author: louis
> Date: Fri May 9 12:02:49 2014
> New Revision: 208435
>
> URL: http://llvm.org/viewvc/llvm-project?rev=208435&view=rev
> Log:
> Add custom lowering for add/sub with overflow intrinsics to ARM
>
> This patch adds support to ARM for custom lowering of the
> llvm.{u|s}add.with.overflow.i32 intrinsics for i32/i64. This is
> particularly useful
> for handling idiomatic saturating math functions as generated by
> InstCombineCompare.
>
> Test cases included.
>
> rdar://14853450
>
> Added:
> llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll
> Modified:
> llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> llvm/trunk/lib/Target/ARM/ARMISelLowering.h
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=208435&r1=208434&r2=208435&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri May 9 12:02:49 2014
> @@ -639,6 +639,11 @@ ARMTargetLowering::ARMTargetLowering(Tar
> }
> }
>
> + setOperationAction(ISD::SADDO, MVT::i32, Custom);
> + setOperationAction(ISD::UADDO, MVT::i32, Custom);
> + setOperationAction(ISD::SSUBO, MVT::i32, Custom);
> + setOperationAction(ISD::USUBO, MVT::i32, Custom);
> +
> // i64 operation support.
> setOperationAction(ISD::MUL, MVT::i64, Expand);
> setOperationAction(ISD::MULHU, MVT::i32, Expand);
> @@ -3222,11 +3227,96 @@ ARMTargetLowering::duplicateCmp(SDValue
> return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
> }
>
> +std::pair<SDValue, SDValue>
> +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
> + SDValue &ARMcc) const {
> + assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
> +
> + SDValue Value, OverflowCmp;
> + SDValue LHS = Op.getOperand(0);
> + SDValue RHS = Op.getOperand(1);
> +
> +
> + // FIXME: We are currently always generating CMPs because we don't
> support
> + // generating CMN through the backend. This is not as good as the
> natural
> + // CMP case because it causes a register dependency and cannot be folded
> + // later.
> +
> + switch (Op.getOpcode()) {
> + default:
> + llvm_unreachable("Unknown overflow instruction!");
> + case ISD::SADDO:
> + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
> + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
> + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value,
> LHS);
> + break;
> + case ISD::UADDO:
> + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
> + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
> + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value,
> LHS);
> + break;
> + case ISD::SSUBO:
> + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
> + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
> + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS,
> RHS);
> + break;
> + case ISD::USUBO:
> + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
> + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
> + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS,
> RHS);
> + break;
> + } // switch (...)
> +
> + return std::make_pair(Value, OverflowCmp);
> +}
> +
> +
> +SDValue
> +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
> + // Let legalize expand this if it isn't a legal type yet.
> + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
> + return SDValue();
> +
> + SDValue Value, OverflowCmp;
> + SDValue ARMcc;
> + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
> + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
> + // We use 0 and 1 as false and true values.
> + SDValue TVal = DAG.getConstant(1, MVT::i32);
> + SDValue FVal = DAG.getConstant(0, MVT::i32);
> + EVT VT = Op.getValueType();
> +
> + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
> + ARMcc, CCR, OverflowCmp);
> +
> + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
> + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
> +}
> +
> +
> SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG)
> const {
> SDValue Cond = Op.getOperand(0);
> SDValue SelectTrue = Op.getOperand(1);
> SDValue SelectFalse = Op.getOperand(2);
> SDLoc dl(Op);
> + unsigned Opc = Cond.getOpcode();
> +
> + if (Cond.getResNo() == 1 &&
> + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
> + Opc == ISD::USUBO)) {
> + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
> + return SDValue();
> +
> + SDValue Value, OverflowCmp;
> + SDValue ARMcc;
> + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
> + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
> + EVT VT = Op.getValueType();
> +
> + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue,
> SelectFalse,
> + ARMcc, CCR, OverflowCmp);
> +
> + }
>
> // Convert:
> //
> @@ -6139,6 +6229,11 @@ SDValue ARMTargetLowering::LowerOperatio
> case ISD::ADDE:
> case ISD::SUBC:
> case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
> + case ISD::SADDO:
> + case ISD::UADDO:
> + case ISD::SSUBO:
> + case ISD::USUBO:
> + return LowerXALUO(Op, DAG);
> case ISD::ATOMIC_LOAD:
> case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
> case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
>
> Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=208435&r1=208434&r2=208435&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
> +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Fri May 9 12:02:49 2014
> @@ -416,6 +416,7 @@ namespace llvm {
> void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT
> PromotedBitwiseVT);
> void addDRTypeForNEON(MVT VT);
> void addQRTypeForNEON(MVT VT);
> + std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG
> &DAG, SDValue &ARMcc) const;
>
> typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
> void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
> @@ -453,6 +454,7 @@ namespace llvm {
> TLSModel::Model model) const;
> SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
> + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
>
> Added: llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll?rev=208435&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll (added)
> +++ llvm/trunk/test/CodeGen/ARM/intrinsics-overflow.ll Fri May 9 12:02:49
> 2014
> @@ -0,0 +1,57 @@
> +; RUN: llc < %s -march=arm -mcpu=generic | FileCheck %s
> +
> +define i32 @uadd_overflow(i32 %a, i32 %b) #0 {
> + %sadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32
> %b)
> + %1 = extractvalue { i32, i1 } %sadd, 1
> + %2 = zext i1 %1 to i32
> + ret i32 %2
> +
> + ; CHECK-LABEL: uadd_overflow:
> + ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
> + ; CHECK: mov r[[R1]], #1
> + ; CHECK: cmp r[[R2]], r[[R0]]
> + ; CHECK: movhs r[[R1]], #0
> +}
> +
> +
> +define i32 @sadd_overflow(i32 %a, i32 %b) #0 {
> + %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32
> %b)
> + %1 = extractvalue { i32, i1 } %sadd, 1
> + %2 = zext i1 %1 to i32
> + ret i32 %2
> +
> + ; CHECK-LABEL: sadd_overflow:
> + ; CHECK: add r[[R2:[0-9]+]], r[[R0:[0-9]+]], r[[R1:[0-9]+]]
> + ; CHECK: mov r[[R1]], #1
> + ; CHECK: cmp r[[R2]], r[[R0]]
> + ; CHECK: movvc r[[R1]], #0
> +}
> +
> +define i32 @usub_overflow(i32 %a, i32 %b) #0 {
> + %sadd = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32
> %b)
> + %1 = extractvalue { i32, i1 } %sadd, 1
> + %2 = zext i1 %1 to i32
> + ret i32 %2
> +
> + ; CHECK-LABEL: usub_overflow:
> + ; CHECK: mov r[[R2]], #1
> + ; CHECK: cmp r[[R0]], r[[R1]]
> + ; CHECK: movhs r[[R2]], #0
> +}
> +
> +define i32 @ssub_overflow(i32 %a, i32 %b) #0 {
> + %sadd = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32
> %b)
> + %1 = extractvalue { i32, i1 } %sadd, 1
> + %2 = zext i1 %1 to i32
> + ret i32 %2
> +
> + ; CHECK-LABEL: ssub_overflow:
> + ; CHECK: mov r[[R2]], #1
> + ; CHECK: cmp r[[R0]], r[[R1]]
> + ; CHECK: movvc r[[R2]], #0
> +}
> +
> +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
> +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #2
> +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #3
> +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #4
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140509/8cc89d43/attachment.html>
More information about the llvm-commits
mailing list