[llvm] r302126 - [ARM] ACLE Chapter 9 intrinsics
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu May 4 00:31:28 PDT 2017
Author: sam_parker
Date: Thu May 4 02:31:28 2017
New Revision: 302126
URL: http://llvm.org/viewvc/llvm-project?rev=302126&view=rev
Log:
[ARM] ACLE Chapter 9 intrinsics
Added the integer data processing intrinsics from ACLE v2.1 Chapter 9
but I have missed out the saturation_occurred intrinsics for now. For
the instructions that read and write the GE bits, a chain is included
and the only instruction that reads these flags (sel) is only
selectable via the implemented intrinsic.
Differential Revision: https://reviews.llvm.org/D32281
Added:
llvm/trunk/test/CodeGen/ARM/acle-intrinsics-v5.ll
llvm/trunk/test/CodeGen/ARM/acle-intrinsics.ll
Removed:
llvm/trunk/test/CodeGen/ARM/sat-arith.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsARM.td
llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
llvm/trunk/lib/Target/ARM/ARMISelLowering.h
llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
Modified: llvm/trunk/include/llvm/IR/IntrinsicsARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsARM.td?rev=302126&r1=302125&r2=302126&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsARM.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsARM.td Thu May 4 02:31:28 2017
@@ -22,12 +22,26 @@ let TargetPrefix = "arm" in { // All in
// and return value are essentially chains, used to force ordering during ISel.
def int_arm_space : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// 16-bit multiplications
+def int_arm_smulbb : GCCBuiltin<"__builtin_arm_smulbb">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smulbt : GCCBuiltin<"__builtin_arm_smulbt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smultb : GCCBuiltin<"__builtin_arm_smultb">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smultt : GCCBuiltin<"__builtin_arm_smultt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smulwb : GCCBuiltin<"__builtin_arm_smulwb">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smulwt : GCCBuiltin<"__builtin_arm_smulwt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// Saturating Arithmetic
def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ [Commutative, IntrNoMem]>;
def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
@@ -35,6 +49,176 @@ def int_arm_ssat : GCCBuiltin<"__builtin
def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Accumulating multiplications
+def int_arm_smlabb : GCCBuiltin<"__builtin_arm_smlabb">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlabt : GCCBuiltin<"__builtin_arm_smlabt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlatb : GCCBuiltin<"__builtin_arm_smlatb">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlatt : GCCBuiltin<"__builtin_arm_smlatt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlawb : GCCBuiltin<"__builtin_arm_smlawb">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlawt : GCCBuiltin<"__builtin_arm_smlawt">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+// Parallel 16-bit saturation
+def int_arm_ssat16 : GCCBuiltin<"__builtin_arm_ssat16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_usat16 : GCCBuiltin<"__builtin_arm_usat16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+// Packing and unpacking
+def int_arm_sxtab16 : GCCBuiltin<"__builtin_arm_sxtab16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_sxtb16 : GCCBuiltin<"__builtin_arm_sxtb16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uxtab16 : GCCBuiltin<"__builtin_arm_uxtab16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uxtb16 : GCCBuiltin<"__builtin_arm_uxtb16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+// Parallel selection, reads the GE flags.
+def int_arm_sel : GCCBuiltin<"__builtin_arm_sel">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
+
+// Parallel 8-bit addition and subtraction
+def int_arm_qadd8 : GCCBuiltin<"__builtin_arm_qadd8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_qsub8 : GCCBuiltin<"__builtin_arm_qsub8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Writes to the GE bits.
+def int_arm_sadd8 : GCCBuiltin<"__builtin_arm_sadd8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_shadd8 : GCCBuiltin<"__builtin_arm_shadd8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_shsub8 : GCCBuiltin<"__builtin_arm_shsub8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Writes to the GE bits.
+def int_arm_ssub8 : GCCBuiltin<"__builtin_arm_ssub8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// Writes to the GE bits.
+def int_arm_uadd8 : GCCBuiltin<"__builtin_arm_uadd8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_uhadd8 : GCCBuiltin<"__builtin_arm_uhadd8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uhsub8 : GCCBuiltin<"__builtin_arm_uhsub8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uqadd8 : GCCBuiltin<"__builtin_arm_uqadd8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uqsub8 : GCCBuiltin<"__builtin_arm_uqsub8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Writes to the GE bits.
+def int_arm_usub8 : GCCBuiltin<"__builtin_arm_usub8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+
+// Sum of 8-bit absolute differences
+def int_arm_usad8 : GCCBuiltin<"__builtin_arm_usad8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_usada8 : GCCBuiltin<"__builtin_arm_usada8">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+// Parallel 16-bit addition and subtraction
+def int_arm_qadd16 : GCCBuiltin<"__builtin_arm_qadd16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_qasx : GCCBuiltin<"__builtin_arm_qasx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_qsax : GCCBuiltin<"__builtin_arm_qsax">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_qsub16 : GCCBuiltin<"__builtin_arm_qsub16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Writes to the GE bits.
+def int_arm_sadd16 : GCCBuiltin<"__builtin_arm_sadd16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// Writes to the GE bits.
+def int_arm_sasx : GCCBuiltin<"__builtin_arm_sasx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_shadd16 : GCCBuiltin<"__builtin_arm_shadd16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_shasx : GCCBuiltin<"__builtin_arm_shasx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_shsax : GCCBuiltin<"__builtin_arm_shsax">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_shsub16 : GCCBuiltin<"__builtin_arm_shsub16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Writes to the GE bits.
+def int_arm_ssax : GCCBuiltin<"__builtin_arm_ssax">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// Writes to the GE bits.
+def int_arm_ssub16 : GCCBuiltin<"__builtin_arm_ssub16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// Writes to the GE bits.
+def int_arm_uadd16 : GCCBuiltin<"__builtin_arm_uadd16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// Writes to the GE bits.
+def int_arm_uasx : GCCBuiltin<"__builtin_arm_uasx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_arm_uhadd16 : GCCBuiltin<"__builtin_arm_uhadd16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uhasx : GCCBuiltin<"__builtin_arm_uhasx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uhsax : GCCBuiltin<"__builtin_arm_uhsax">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uhsub16 : GCCBuiltin<"__builtin_arm_uhsub16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uqadd16 : GCCBuiltin<"__builtin_arm_uqadd16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uqasx : GCCBuiltin<"__builtin_arm_uqasx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uqsax : GCCBuiltin<"__builtin_arm_uqsax">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_uqsub16 : GCCBuiltin<"__builtin_arm_uqsub16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+// Writes to the GE bits.
+def int_arm_usax : GCCBuiltin<"__builtin_arm_usax">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+// Writes to the GE bits.
+def int_arm_usub16 : GCCBuiltin<"__builtin_arm_usub16">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
+
+// Parallel 16-bit multiplication
+def int_arm_smlad : GCCBuiltin<"__builtin_arm_smlad">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smladx : GCCBuiltin<"__builtin_arm_smladx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlald : GCCBuiltin<"__builtin_arm_smlald">,
+ Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+def int_arm_smlaldx : GCCBuiltin<"__builtin_arm_smlaldx">,
+ Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+def int_arm_smlsd : GCCBuiltin<"__builtin_arm_smlsd">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlsdx : GCCBuiltin<"__builtin_arm_smlsdx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+def int_arm_smlsld : GCCBuiltin<"__builtin_arm_smlsld">,
+ Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+def int_arm_smlsldx : GCCBuiltin<"__builtin_arm_smlsldx">,
+ Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+def int_arm_smuad : GCCBuiltin<"__builtin_arm_smuad">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smuadx : GCCBuiltin<"__builtin_arm_smuadx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smusd : GCCBuiltin<"__builtin_arm_smusd">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_arm_smusdx : GCCBuiltin<"__builtin_arm_smusdx">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+
//===----------------------------------------------------------------------===//
// Load, Store and Clear exclusive
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=302126&r1=302125&r2=302126&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu May 4 02:31:28 2017
@@ -822,6 +822,7 @@ ARMTargetLowering::ARMTargetLowering(con
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i64, Custom);
setOperationAction(ISD::SRA, MVT::i64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
@@ -1344,6 +1345,10 @@ const char *ARMTargetLowering::getTarget
case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
case ARMISD::SMULWB: return "ARMISD::SMULWB";
case ARMISD::SMULWT: return "ARMISD::SMULWT";
+ case ARMISD::SMLALD: return "ARMISD::SMLALD";
+ case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
+ case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
+ case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -7722,6 +7727,37 @@ SDValue ARMTargetLowering::LowerOperatio
}
}
+static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ unsigned Opc = 0;
+ if (IntNo == Intrinsic::arm_smlald)
+ Opc = ARMISD::SMLALD;
+ else if (IntNo == Intrinsic::arm_smlaldx)
+ Opc = ARMISD::SMLALDX;
+ else if (IntNo == Intrinsic::arm_smlsld)
+ Opc = ARMISD::SMLSLD;
+ else if (IntNo == Intrinsic::arm_smlsldx)
+ Opc = ARMISD::SMLSLDX;
+ else
+ return;
+
+ SDLoc dl(N);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(3),
+ DAG.getConstant(0, dl, MVT::i32));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+ N->getOperand(3),
+ DAG.getConstant(1, dl, MVT::i32));
+
+ SDValue LongMul = DAG.getNode(Opc, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ N->getOperand(1), N->getOperand(2),
+ Lo, Hi);
+ Results.push_back(LongMul.getValue(0));
+ Results.push_back(LongMul.getValue(1));
+}
+
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
@@ -7763,6 +7799,8 @@ void ARMTargetLowering::ReplaceNodeResul
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_64Results(N, Results, DAG);
return;
+ case ISD::INTRINSIC_WO_CHAIN:
+ return ReplaceLongIntrinsic(N, Results, DAG);
}
if (Res.getNode())
Results.push_back(Res);
Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=302126&r1=302125&r2=302126&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Thu May 4 02:31:28 2017
@@ -184,6 +184,10 @@ class InstrItineraryData;
SMLALBT, // 64-bit signed accumulate multiply bottom, top 16
SMLALTB, // 64-bit signed accumulate multiply top, bottom 16
SMLALTT, // 64-bit signed accumulate multiply top, top 16
+ SMLALD, // Signed multiply accumulate long dual
+ SMLALDX, // Signed multiply accumulate long dual exchange
+ SMLSLD, // Signed multiply subtract long dual
+ SMLSLDX, // Signed multiply subtract long dual exchange
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
Modified: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrInfo.td?rev=302126&r1=302125&r2=302126&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td Thu May 4 02:31:28 2017
@@ -99,6 +99,11 @@ def SDT_LongMac : SDTypeProfile<2, 4, [
SDTCisSameAs<0, 4>,
SDTCisSameAs<0, 5>]>;
+def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
+def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
+def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
+def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>;
@@ -870,7 +875,9 @@ def imm1_16_XFORM: SDNodeXForm<imm, [{
MVT::i32);
}]>;
def Imm1_16AsmOperand: ImmAsmOperandMinusOne<1,16> { let Name = "Imm1_16"; }
-def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
+def imm1_16 : Operand<i32>, ImmLeaf<i32, [{
+ return Imm > 0 && Imm <= 16;
+ }],
imm1_16_XFORM> {
let PrintMethod = "printImmPlusOneOperand";
let ParserMatchClass = Imm1_16AsmOperand;
@@ -1983,7 +1990,9 @@ def : InstAlias<"sevl$p", (HINT 5, pred:
def : InstAlias<"esb$p", (HINT 16, pred:$p)>, Requires<[IsARM, HasRAS]>;
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
- "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
+ "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>,
+ Requires<[IsARM, HasV6]> {
bits<4> Rd;
bits<4> Rn;
bits<4> Rm;
@@ -3472,8 +3481,12 @@ def : ARMV6Pat<(add rGPR:$Rn, (sext_inre
(SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
def SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
+def : ARMV6Pat<(int_arm_sxtb16 GPR:$Src),
+ (SXTB16 GPR:$Src, 0)>;
def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+def : ARMV6Pat<(int_arm_sxtab16 GPR:$LHS, GPR:$RHS),
+ (SXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
// Zero extenders
@@ -3493,6 +3506,8 @@ def UXTB16 : AI_ext_rrot<0b01101100,
// (UXTB16r_rot GPR:$Src, 3)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16 GPR:$Src, 1)>;
+def : ARMV6Pat<(int_arm_uxtb16 GPR:$Src),
+ (UXTB16 GPR:$Src, 0)>;
def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -3507,6 +3522,8 @@ def : ARMV6Pat<(add rGPR:$Rn, (and (srl
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
def UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+def : ARMV6Pat<(int_arm_uxtab16 GPR:$LHS, GPR:$RHS),
+ (UXTAB16 GPR:$LHS, GPR:$RHS, 0)>;
def SBFX : I<(outs GPRnopc:$Rd),
@@ -3633,71 +3650,85 @@ class AAI<bits<8> op27_20, bits<8> op11_
let Unpredictable{11-8} = 0b1111;
}
-// Saturating add/subtract
+// Wrappers around the AAI class
+class AAIRevOpr<bits<8> op27_20, bits<8> op11_4, string opc,
+ list<dag> pattern = []>
+ : AAI<op27_20, op11_4, opc,
+ pattern,
+ (ins GPRnopc:$Rm, GPRnopc:$Rn),
+ "\t$Rd, $Rm, $Rn">;
+
+class AAIIntrinsic<bits<8> op27_20, bits<8> op11_4, string opc,
+ Intrinsic intrinsic>
+ : AAI<op27_20, op11_4, opc,
+ [(set GPRnopc:$Rd, (intrinsic GPRnopc:$Rn, GPRnopc:$Rm))]>;
+// Saturating add/subtract
+let hasSideEffects = 1 in {
+def QADD8 : AAIIntrinsic<0b01100010, 0b11111001, "qadd8", int_arm_qadd8>;
+def QADD16 : AAIIntrinsic<0b01100010, 0b11110001, "qadd16", int_arm_qadd16>;
+def QSUB16 : AAIIntrinsic<0b01100010, 0b11110111, "qsub16", int_arm_qsub16>;
+def QSUB8 : AAIIntrinsic<0b01100010, 0b11111111, "qsub8", int_arm_qsub8>;
+
+def QDADD : AAIRevOpr<0b00010100, 0b00000101, "qdadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd (int_arm_qadd GPRnopc:$Rm,
+ GPRnopc:$Rm),
+ GPRnopc:$Rn))]>;
+def QDSUB : AAIRevOpr<0b00010110, 0b00000101, "qdsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm,
+ (int_arm_qadd GPRnopc:$Rn, GPRnopc:$Rn)))]>;
+def QSUB : AAIRevOpr<0b00010010, 0b00000101, "qsub",
+ [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))]>;
let DecoderMethod = "DecodeQADDInstruction" in
-def QADD : AAI<0b00010000, 0b00000101, "qadd",
- [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))],
- (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
-
-def QSUB : AAI<0b00010010, 0b00000101, "qsub",
- [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))],
- (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">;
-def QDADD : AAI<0b00010100, 0b00000101, "qdadd", [],
- (ins GPRnopc:$Rm, GPRnopc:$Rn),
- "\t$Rd, $Rm, $Rn">;
-def QDSUB : AAI<0b00010110, 0b00000101, "qdsub", [],
- (ins GPRnopc:$Rm, GPRnopc:$Rn),
- "\t$Rd, $Rm, $Rn">;
-
-def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
-def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
-def QASX : AAI<0b01100010, 0b11110011, "qasx">;
-def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
-def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
-def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
-def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
-def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
-def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
-def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
-def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
-def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
+ def QADD : AAIRevOpr<0b00010000, 0b00000101, "qadd",
+ [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))]>;
+}
+
+def UQADD16 : AAIIntrinsic<0b01100110, 0b11110001, "uqadd16", int_arm_uqadd16>;
+def UQADD8 : AAIIntrinsic<0b01100110, 0b11111001, "uqadd8", int_arm_uqadd8>;
+def UQSUB16 : AAIIntrinsic<0b01100110, 0b11110111, "uqsub16", int_arm_uqsub16>;
+def UQSUB8 : AAIIntrinsic<0b01100110, 0b11111111, "uqsub8", int_arm_uqsub8>;
+def QASX : AAIIntrinsic<0b01100010, 0b11110011, "qasx", int_arm_qasx>;
+def QSAX : AAIIntrinsic<0b01100010, 0b11110101, "qsax", int_arm_qsax>;
+def UQASX : AAIIntrinsic<0b01100110, 0b11110011, "uqasx", int_arm_uqasx>;
+def UQSAX : AAIIntrinsic<0b01100110, 0b11110101, "uqsax", int_arm_uqsax>;
// Signed/Unsigned add/subtract
-def SASX : AAI<0b01100001, 0b11110011, "sasx">;
-def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
-def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
-def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
-def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
-def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
-def UASX : AAI<0b01100101, 0b11110011, "uasx">;
-def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
-def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
-def USAX : AAI<0b01100101, 0b11110101, "usax">;
-def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
-def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
+def SASX : AAIIntrinsic<0b01100001, 0b11110011, "sasx", int_arm_sasx>;
+def SADD16 : AAIIntrinsic<0b01100001, 0b11110001, "sadd16", int_arm_sadd16>;
+def SADD8 : AAIIntrinsic<0b01100001, 0b11111001, "sadd8", int_arm_sadd8>;
+def SSAX : AAIIntrinsic<0b01100001, 0b11110101, "ssax", int_arm_ssax>;
+def SSUB16 : AAIIntrinsic<0b01100001, 0b11110111, "ssub16", int_arm_ssub16>;
+def SSUB8 : AAIIntrinsic<0b01100001, 0b11111111, "ssub8", int_arm_ssub8>;
+def UASX : AAIIntrinsic<0b01100101, 0b11110011, "uasx", int_arm_uasx>;
+def UADD16 : AAIIntrinsic<0b01100101, 0b11110001, "uadd16", int_arm_uadd16>;
+def UADD8 : AAIIntrinsic<0b01100101, 0b11111001, "uadd8", int_arm_uadd8>;
+def USAX : AAIIntrinsic<0b01100101, 0b11110101, "usax", int_arm_usax>;
+def USUB16 : AAIIntrinsic<0b01100101, 0b11110111, "usub16", int_arm_usub16>;
+def USUB8 : AAIIntrinsic<0b01100101, 0b11111111, "usub8", int_arm_usub8>;
// Signed/Unsigned halving add/subtract
-def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
-def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
-def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
-def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
-def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
-def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
-def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
-def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
-def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
-def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
-def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
-def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
+def SHASX : AAIIntrinsic<0b01100011, 0b11110011, "shasx", int_arm_shasx>;
+def SHADD16 : AAIIntrinsic<0b01100011, 0b11110001, "shadd16", int_arm_shadd16>;
+def SHADD8 : AAIIntrinsic<0b01100011, 0b11111001, "shadd8", int_arm_shadd8>;
+def SHSAX : AAIIntrinsic<0b01100011, 0b11110101, "shsax", int_arm_shsax>;
+def SHSUB16 : AAIIntrinsic<0b01100011, 0b11110111, "shsub16", int_arm_shsub16>;
+def SHSUB8 : AAIIntrinsic<0b01100011, 0b11111111, "shsub8", int_arm_shsub8>;
+def UHASX : AAIIntrinsic<0b01100111, 0b11110011, "uhasx", int_arm_uhasx>;
+def UHADD16 : AAIIntrinsic<0b01100111, 0b11110001, "uhadd16", int_arm_uhadd16>;
+def UHADD8 : AAIIntrinsic<0b01100111, 0b11111001, "uhadd8", int_arm_uhadd8>;
+def UHSAX : AAIIntrinsic<0b01100111, 0b11110101, "uhsax", int_arm_uhsax>;
+def UHSUB16 : AAIIntrinsic<0b01100111, 0b11110111, "uhsub16", int_arm_uhsub16>;
+def UHSUB8 : AAIIntrinsic<0b01100111, 0b11111111, "uhsub8", int_arm_uhsub8>;
// Unsigned Sum of Absolute Differences [and Accumulate].
def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
- "\t$Rd, $Rn, $Rm", []>,
+ "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (int_arm_usad8 GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> {
bits<4> Rd;
bits<4> Rn;
@@ -3711,7 +3742,8 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR
}
def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
MulFrm /* for convenience */, NoItinerary, "usada8",
- "\t$Rd, $Rn, $Rm, $Ra", []>,
+ "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (int_arm_usada8 GPR:$Rn, GPR:$Rm, GPR:$Ra))]>,
Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{
bits<4> Rd;
bits<4> Rn;
@@ -3726,7 +3758,6 @@ def USADA8 : AI<(outs GPR:$Rd), (ins GPR
}
// Signed/Unsigned saturate
-
def SSAT : AI<(outs GPRnopc:$Rd),
(ins imm1_32:$sat_imm, GPRnopc:$Rn, shift_imm:$sh),
SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh", []>,
@@ -3795,6 +3826,10 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a,
(USAT imm0_31:$pos, GPRnopc:$a, 0)>;
def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
(SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
+def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos),
+ (SSAT16 imm1_16:$pos, GPRnopc:$a)>;
+def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos),
+ (USAT16 imm0_15:$pos, GPRnopc:$a)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -4220,8 +4255,8 @@ multiclass AI_smla<string opc> {
IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
[(set GPRnopc:$Rd,
(add GPR:$Ra, (ARMsmulwt GPRnopc:$Rn, GPRnopc:$Rm)))]>,
- Requires<[IsARM, HasV5TE, UseMulOps]>,
- Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
+ Requires<[IsARM, HasV5TE, UseMulOps]>,
+ Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>;
}
}
@@ -4255,7 +4290,8 @@ def : ARMV5TEPat<(ARMsmlaltt GPR:$Rn, GP
// Helper class for AI_smld.
class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
InstrItinClass itin, string opc, string asm>
- : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>,
+ Requires<[IsARM, HasV6]> {
bits<4> Rn;
bits<4> Rm;
let Inst{27-23} = 0b01110;
@@ -4305,20 +4341,40 @@ multiclass AI_smld<bit sub, string opc>
Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>;
def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ NoItinerary,
!strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi),
- (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary,
+ (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ NoItinerary,
!strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">,
+ RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>;
-
}
defm SMLA : AI_smld<0, "smla">;
defm SMLS : AI_smld<1, "smls">;
+def : ARMV6Pat<(int_arm_smlad GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLAD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(int_arm_smladx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLADX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(int_arm_smlsd GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLSD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(int_arm_smlsdx GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra),
+ (SMLSDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra)>;
+def : ARMV6Pat<(ARMSmlald GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLALD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+def : ARMV6Pat<(ARMSmlaldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLALDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+def : ARMV6Pat<(ARMSmlsld GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLSLD GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+def : ARMV6Pat<(ARMSmlsldx GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi),
+ (SMLSLDX GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$RLo, GPRnopc:$RHi)>;
+
multiclass AI_sdml<bit sub, string opc> {
def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
@@ -4332,6 +4388,15 @@ multiclass AI_sdml<bit sub, string opc>
defm SMUA : AI_sdml<0, "smua">;
defm SMUS : AI_sdml<1, "smus">;
+def : ARMV6Pat<(int_arm_smuad GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUAD GPRnopc:$Rn, GPRnopc:$Rm)>;
+def : ARMV6Pat<(int_arm_smuadx GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUADX GPRnopc:$Rn, GPRnopc:$Rm)>;
+def : ARMV6Pat<(int_arm_smusd GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUSD GPRnopc:$Rn, GPRnopc:$Rm)>;
+def : ARMV6Pat<(int_arm_smusdx GPRnopc:$Rn, GPRnopc:$Rm),
+ (SMUSDX GPRnopc:$Rn, GPRnopc:$Rm)>;
+
//===----------------------------------------------------------------------===//
// Division Instructions (ARMv7-A with virtualization extension)
//
@@ -5648,6 +5713,32 @@ def : ARMV5MOPat<(add GPR:$acc,
(SMLATB GPR:$a, GPR:$b, GPR:$acc)>,
Sched<[WriteMUL32, ReadMUL, ReadMUL]>;
+def : ARMV5TEPat<(int_arm_smulbb GPR:$a, GPR:$b),
+ (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smulbt GPR:$a, GPR:$b),
+ (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smultb GPR:$a, GPR:$b),
+ (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smultt GPR:$a, GPR:$b),
+ (SMULTT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smulwb GPR:$a, GPR:$b),
+ (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(int_arm_smulwt GPR:$a, GPR:$b),
+ (SMULWT GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
+ (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc),
+ (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc),
+ (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc),
+ (SMLATT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc),
+ (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc),
+ (SMLAWT GPR:$a, GPR:$b, GPR:$acc)>;
+
// Pre-v7 uses MCR for synchronization barriers.
def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
Requires<[IsARM, HasV6]>;
Modified: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td?rev=302126&r1=302125&r2=302126&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td Thu May 4 02:31:28 2017
@@ -1993,6 +1993,10 @@ def : Thumb2DSPPat<(add rGPR:$Rn,
def : Thumb2DSPPat<(add rGPR:$Rn,
(sext_inreg (rotr rGPR:$Rm, rot_imm:$rot), i16)),
(t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_sxtb16 rGPR:$Rn),
+ (t2SXTB16 rGPR:$Rn, 0)>;
+def : Thumb2DSPPat<(int_arm_sxtab16 rGPR:$Rn, rGPR:$Rm),
+ (t2SXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
// A simple right-shift can also be used in most cases (the exception is the
@@ -2026,6 +2030,9 @@ def : Thumb2DSPPat<(and (rotr rGPR:$Rm,
def : Thumb2DSPPat<(and (rotr rGPR:$Rm, rot_imm:$rot), 0x00FF00FF),
(t2UXTB16 rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_uxtb16 rGPR:$Rm),
+ (t2UXTB16 rGPR:$Rm, 0)>;
+
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
// instead so we can include a check for masking back in the upper
@@ -2053,6 +2060,8 @@ def : Thumb2DSPPat<(add rGPR:$Rn, (and (
def : Thumb2DSPPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
0xFFFF)),
(t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2DSPPat<(int_arm_uxtab16 rGPR:$Rn, rGPR:$Rm),
+ (t2UXTAB16 rGPR:$Rn, rGPR:$Rm, 0)>;
}
@@ -2137,10 +2146,9 @@ def : T2Pat<(ARMadde rGPR:$src, t2_so
def : T2Pat<(ARMadde rGPR:$src, imm0_65535_neg:$imm, CPSR),
(t2SBCrr rGPR:$src, (t2MOVi16 (imm_not_XFORM imm:$imm)))>;
-// Select Bytes -- for disassembly only
-
def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
- NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+ NoItinerary, "sel", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (int_arm_sel GPR:$Rn, GPR:$Rm))]>,
Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b010;
@@ -2154,9 +2162,7 @@ def t2SEL : T2ThreeReg<(outs GPR:$Rd), (
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
- list<dag> pat = [/* For disassembly only; pattern left blank */],
- dag iops = (ins rGPR:$Rn, rGPR:$Rm),
- string asm = "\t$Rd, $Rn, $Rm">
+ list<dag> pat, dag iops, string asm>
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
Requires<[IsThumb2, HasDSP]> {
let Inst{31-27} = 0b11111;
@@ -2174,60 +2180,72 @@ class T2I_pam<bits<3> op22_20, bits<4> o
let Inst{3-0} = Rm;
}
-// Saturating add/subtract -- for disassembly only
-
-def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
- [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
-def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
-def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
-def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd", [],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub", [],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
-def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
- [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
- (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
-def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
-def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
-def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
-def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
-def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
-def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
-def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
-def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
-
-// Signed/Unsigned add/subtract -- for disassembly only
-
-def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
-def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
-def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
-def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
-def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
-def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
-def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
-def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
-def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
-def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
-def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
-def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
-
-// Signed/Unsigned halving add/subtract -- for disassembly only
-
-def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
-def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
-def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
-def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
-def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
-def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
-def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
-def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
-def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
-def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
-def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
-def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+class T2I_pam_intrinsics<bits<3> op22_20, bits<4> op7_4, string opc,
+ Intrinsic intrinsic>
+ : T2I_pam<op22_20, op7_4, opc,
+ [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))],
+ (ins rGPR:$Rn, rGPR:$Rm), "\t$Rd, $Rn, $Rm">;
+
+class T2I_pam_intrinsics_rev<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2I_pam<op22_20, op7_4, opc, [],
+ (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+
+// Saturating add/subtract
+def t2QADD16 : T2I_pam_intrinsics<0b001, 0b0001, "qadd16", int_arm_qadd16>;
+def t2QADD8 : T2I_pam_intrinsics<0b000, 0b0001, "qadd8", int_arm_qadd8>;
+def t2QASX : T2I_pam_intrinsics<0b010, 0b0001, "qasx", int_arm_qasx>;
+def t2UQSUB8 : T2I_pam_intrinsics<0b100, 0b0101, "uqsub8", int_arm_uqsub8>;
+def t2QSAX : T2I_pam_intrinsics<0b110, 0b0001, "qsax", int_arm_qsax>;
+def t2QSUB16 : T2I_pam_intrinsics<0b101, 0b0001, "qsub16", int_arm_qsub16>;
+def t2QSUB8 : T2I_pam_intrinsics<0b100, 0b0001, "qsub8", int_arm_qsub8>;
+def t2UQADD16 : T2I_pam_intrinsics<0b001, 0b0101, "uqadd16", int_arm_uqadd16>;
+def t2UQADD8 : T2I_pam_intrinsics<0b000, 0b0101, "uqadd8", int_arm_uqadd8>;
+def t2UQASX : T2I_pam_intrinsics<0b010, 0b0101, "uqasx", int_arm_uqasx>;
+def t2UQSAX : T2I_pam_intrinsics<0b110, 0b0101, "uqsax", int_arm_uqsax>;
+def t2UQSUB16 : T2I_pam_intrinsics<0b101, 0b0101, "uqsub16", int_arm_uqsub16>;
+def t2QADD : T2I_pam_intrinsics_rev<0b000, 0b1000, "qadd">;
+def t2QSUB : T2I_pam_intrinsics_rev<0b000, 0b1010, "qsub">;
+def t2QDADD : T2I_pam_intrinsics_rev<0b000, 0b1001, "qdadd">;
+def t2QDSUB : T2I_pam_intrinsics_rev<0b000, 0b1011, "qdsub">;
+
+def : Thumb2DSPPat<(int_arm_qadd rGPR:$Rm, rGPR:$Rn),
+ (t2QADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, rGPR:$Rn),
+ (t2QSUB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(int_arm_qadd(int_arm_qadd rGPR:$Rm, rGPR:$Rm), rGPR:$Rn),
+ (t2QDADD rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(int_arm_qsub rGPR:$Rm, (int_arm_qadd rGPR:$Rn, rGPR:$Rn)),
+ (t2QDSUB rGPR:$Rm, rGPR:$Rn)>;
+
+// Signed/Unsigned add/subtract
+
+def t2SASX : T2I_pam_intrinsics<0b010, 0b0000, "sasx", int_arm_sasx>;
+def t2SADD16 : T2I_pam_intrinsics<0b001, 0b0000, "sadd16", int_arm_sadd16>;
+def t2SADD8 : T2I_pam_intrinsics<0b000, 0b0000, "sadd8", int_arm_sadd8>;
+def t2SSAX : T2I_pam_intrinsics<0b110, 0b0000, "ssax", int_arm_ssax>;
+def t2SSUB16 : T2I_pam_intrinsics<0b101, 0b0000, "ssub16", int_arm_ssub16>;
+def t2SSUB8 : T2I_pam_intrinsics<0b100, 0b0000, "ssub8", int_arm_ssub8>;
+def t2UASX : T2I_pam_intrinsics<0b010, 0b0100, "uasx", int_arm_uasx>;
+def t2UADD16 : T2I_pam_intrinsics<0b001, 0b0100, "uadd16", int_arm_uadd16>;
+def t2UADD8 : T2I_pam_intrinsics<0b000, 0b0100, "uadd8", int_arm_uadd8>;
+def t2USAX : T2I_pam_intrinsics<0b110, 0b0100, "usax", int_arm_usax>;
+def t2USUB16 : T2I_pam_intrinsics<0b101, 0b0100, "usub16", int_arm_usub16>;
+def t2USUB8 : T2I_pam_intrinsics<0b100, 0b0100, "usub8", int_arm_usub8>;
+
+// Signed/Unsigned halving add/subtract
+
+def t2SHASX : T2I_pam_intrinsics<0b010, 0b0010, "shasx", int_arm_shasx>;
+def t2SHADD16 : T2I_pam_intrinsics<0b001, 0b0010, "shadd16", int_arm_shadd16>;
+def t2SHADD8 : T2I_pam_intrinsics<0b000, 0b0010, "shadd8", int_arm_shadd8>;
+def t2SHSAX : T2I_pam_intrinsics<0b110, 0b0010, "shsax", int_arm_shsax>;
+def t2SHSUB16 : T2I_pam_intrinsics<0b101, 0b0010, "shsub16", int_arm_shsub16>;
+def t2SHSUB8 : T2I_pam_intrinsics<0b100, 0b0010, "shsub8", int_arm_shsub8>;
+def t2UHASX : T2I_pam_intrinsics<0b010, 0b0110, "uhasx", int_arm_uhasx>;
+def t2UHADD16 : T2I_pam_intrinsics<0b001, 0b0110, "uhadd16", int_arm_uhadd16>;
+def t2UHADD8 : T2I_pam_intrinsics<0b000, 0b0110, "uhadd8", int_arm_uhadd8>;
+def t2UHSAX : T2I_pam_intrinsics<0b110, 0b0110, "uhsax", int_arm_uhsax>;
+def t2UHSUB16 : T2I_pam_intrinsics<0b101, 0b0110, "uhsub16", int_arm_uhsub16>;
+def t2UHSUB8 : T2I_pam_intrinsics<0b100, 0b0110, "uhsub8", int_arm_uhsub8>;
// Helper class for disassembly only
// A6.3.16 & A6.3.17
@@ -2255,16 +2273,19 @@ class T2FourReg_mac<bit long, bits<3> op
// Unsigned Sum of Absolute Differences [and Accumulate].
def t2USAD8 : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
- NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+ NoItinerary, "usad8", "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (int_arm_usad8 rGPR:$Rn, rGPR:$Rm))]>,
Requires<[IsThumb2, HasDSP]> {
let Inst{15-12} = 0b1111;
}
def t2USADA8 : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
- "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+ "usada8", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (int_arm_usada8 rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>,
Requires<[IsThumb2, HasDSP]>;
// Signed/Unsigned saturate.
+let hasSideEffects = 1 in
class T2SatI<dag iops, string opc, string asm>
: T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, []> {
bits<4> Rd;
@@ -2313,10 +2334,16 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_i
let Inst{4} = 0;
}
-def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm),
(t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos),
+ (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos),
+ (t2USAT imm0_31:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat16 GPR:$a, imm1_16:$pos),
+ (t2SSAT16 imm1_16:$pos, GPR:$a)>;
+def : T2Pat<(int_arm_usat16 GPR:$a, imm0_15:$pos),
+ (t2USAT16 imm0_15:$pos, GPR:$a)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -2689,6 +2716,18 @@ def : Thumb2DSPPat<(mul sext_16_node:$Rn
(t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
(t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulbb rGPR:$Rn, rGPR:$Rm),
+ (t2SMULBB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulbt rGPR:$Rn, rGPR:$Rm),
+ (t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smultb rGPR:$Rn, rGPR:$Rm),
+ (t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smultt rGPR:$Rn, rGPR:$Rm),
+ (t2SMULTT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulwb rGPR:$Rn, rGPR:$Rm),
+ (t2SMULWB rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(int_arm_smulwt rGPR:$Rn, rGPR:$Rm),
+ (t2SMULWT rGPR:$Rn, rGPR:$Rm)>;
class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
list<dag> pattern>
@@ -2730,6 +2769,19 @@ def : Thumb2DSPMulPat<(add rGPR:$Ra,
(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
(t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPPat<(int_arm_smlabb GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlabt GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlatb GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlatt GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLATT GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlawb GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : Thumb2DSPPat<(int_arm_smlawt GPR:$a, GPR:$b, GPR:$acc),
+ (t2SMLAWT GPR:$a, GPR:$b, GPR:$acc)>;
+
// Halfword multiple accumulate long: SMLAL<x><y>
def t2SMLALBB : T2MlaLong<0b100, 0b1000, "smlalbb">,
Requires<[IsThumb2, HasDSP]>;
@@ -2749,39 +2801,44 @@ def : Thumb2DSPPat<(ARMsmlaltb GPR:$Rn,
def : Thumb2DSPPat<(ARMsmlaltt GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi),
(t2SMLALTT $Rn, $Rm, $RLo, $RHi)>;
-class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
+class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc,
+ Intrinsic intrinsic>
: T2ThreeReg_mac<0, op22_20, op7_4,
(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm),
- IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
+ IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm",
+ [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm))]>,
Requires<[IsThumb2, HasDSP]>,
Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> {
let Inst{15-12} = 0b1111;
}
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">;
-def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">;
-def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">;
-def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">;
+def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad", int_arm_smuad>;
+def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx", int_arm_smuadx>;
+def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd", int_arm_smusd>;
+def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx", int_arm_smusdx>;
-class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc>
+class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc,
+ Intrinsic intrinsic>
: T2FourReg_mac<0, op22_20, op7_4,
(outs rGPR:$Rd),
(ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra),
- IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>,
+ IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra",
+ [(set rGPR:$Rd, (intrinsic rGPR:$Rn, rGPR:$Rm, rGPR:$Ra))]>,
Requires<[IsThumb2, HasDSP]>;
-def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad">;
-def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx">;
-def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">;
-def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">;
+def t2SMLAD : T2DualHalfMulAdd<0b010, 0b0000, "smlad", int_arm_smlad>;
+def t2SMLADX : T2DualHalfMulAdd<0b010, 0b0001, "smladx", int_arm_smladx>;
+def t2SMLSD : T2DualHalfMulAdd<0b100, 0b0000, "smlsd", int_arm_smlsd>;
+def t2SMLSDX : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx", int_arm_smlsdx>;
class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc>
: T2FourReg_mac<1, op22_20, op7_4,
(outs rGPR:$Ra, rGPR:$Rd),
- (ins rGPR:$Rn, rGPR:$Rm),
+ (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
+ RegConstraint<"$Ra = $RLo, $Rd = $RHi">,
Requires<[IsThumb2, HasDSP]>,
Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>;
@@ -2790,6 +2847,15 @@ def t2SMLALDX : T2DualHalfMulAddLong<0b1
def t2SMLSLD : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">;
def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
+def : Thumb2DSPPat<(ARMSmlald rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLALD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+def : Thumb2DSPPat<(ARMSmlaldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLALDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+def : Thumb2DSPPat<(ARMSmlsld rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLSLD rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+def : Thumb2DSPPat<(ARMSmlsldx rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi),
+ (t2SMLSLDX rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi)>;
+
//===----------------------------------------------------------------------===//
// Division Instructions.
// Signed and unsigned division on v7-M
Added: llvm/trunk/test/CodeGen/ARM/acle-intrinsics-v5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/acle-intrinsics-v5.ll?rev=302126&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/acle-intrinsics-v5.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/acle-intrinsics-v5.ll Thu May 4 02:31:28 2017
@@ -0,0 +1,110 @@
+; RUN: llc -O1 -mtriple=armv5te-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=armv7-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv6t2-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv7em-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv8m.main-none-none-eabi -mattr=+dsp %s -o - | FileCheck %s
+define i32 @smulbb(i32 %a, i32 %b) {
+; CHECK-LABEL: smulbb
+; CHECK: smulbb r0, r0, r1
+ %tmp = call i32 @llvm.arm.smulbb(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smulbt(i32 %a, i32 %b) {
+; CHECK-LABEL: smulbt
+; CHECK: smulbt r0, r0, r1
+ %tmp = call i32 @llvm.arm.smulbt(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smultb(i32 %a, i32 %b) {
+; CHECK-LABEL: smultb
+; CHECK: smultb r0, r0, r1
+ %tmp = call i32 @llvm.arm.smultb(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smultt(i32 %a, i32 %b) {
+; CHECK-LABEL: smultt
+; CHECK: smultt r0, r0, r1
+ %tmp = call i32 @llvm.arm.smultt(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smulwb(i32 %a, i32 %b) {
+; CHECK-LABEL: smulwb
+; CHECK: smulwb r0, r0, r1
+ %tmp = call i32 @llvm.arm.smulwb(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smulwt(i32 %a, i32 %b) {
+; CHECK-LABEL: smulwt
+; CHECK: smulwt r0, r0, r1
+ %tmp = call i32 @llvm.arm.smulwt(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @acc_mults(i32 %a, i32 %b, i32 %acc) {
+; CHECK-LABEL: acc_mults
+; CHECK: smlabb r2, r0, r1, r2
+; CHECK: smlabt r2, r0, r1, r2
+; CHECK: smlatb r2, r0, r1, r2
+; CHECK: smlatt r2, r0, r1, r2
+; CHECK: smlawb r2, r0, r1, r2
+; CHECK: smlawt r0, r0, r1, r2
+ %acc1 = call i32 @llvm.arm.smlabb(i32 %a, i32 %b, i32 %acc)
+ %acc2 = call i32 @llvm.arm.smlabt(i32 %a, i32 %b, i32 %acc1)
+ %acc3 = call i32 @llvm.arm.smlatb(i32 %a, i32 %b, i32 %acc2)
+ %acc4 = call i32 @llvm.arm.smlatt(i32 %a, i32 %b, i32 %acc3)
+ %acc5 = call i32 @llvm.arm.smlawb(i32 %a, i32 %b, i32 %acc4)
+ %acc6 = call i32 @llvm.arm.smlawt(i32 %a, i32 %b, i32 %acc5)
+ ret i32 %acc6
+}
+
+define i32 @qadd(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qadd
+; CHECK: qadd r0, r0, r1
+ %tmp = call i32 @llvm.arm.qadd(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qsub(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qsub
+; CHECK: qsub r0, r0, r1
+ %tmp = call i32 @llvm.arm.qsub(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qdadd(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qdadd
+; CHECK: qdadd r0, r0, r1
+ %dbl = call i32 @llvm.arm.qadd(i32 %a, i32 %a)
+ %add = call i32 @llvm.arm.qadd(i32 %dbl, i32 %b)
+ ret i32 %add
+}
+
+define i32 @qdsub(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qdsub
+; CHECK: qdsub r0, r0, r1
+ %dbl = call i32 @llvm.arm.qadd(i32 %b, i32 %b)
+ %add = call i32 @llvm.arm.qsub(i32 %a, i32 %dbl)
+ ret i32 %add
+}
+
+declare i32 @llvm.arm.smulbb(i32 %a, i32 %b) nounwind readnone
+declare i32 @llvm.arm.smulbt(i32 %a, i32 %b) nounwind readnone
+declare i32 @llvm.arm.smultb(i32 %a, i32 %b) nounwind readnone
+declare i32 @llvm.arm.smultt(i32 %a, i32 %b) nounwind readnone
+declare i32 @llvm.arm.smulwb(i32 %a, i32 %b) nounwind readnone
+declare i32 @llvm.arm.smulwt(i32 %a, i32 %b) nounwind readnone
+declare i32 @llvm.arm.smlabb(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smlabt(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smlatb(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smlatt(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smlawb(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smlawt(i32, i32, i32) nounwind
+declare i32 @llvm.arm.qadd(i32, i32) nounwind
+declare i32 @llvm.arm.qsub(i32, i32) nounwind
Added: llvm/trunk/test/CodeGen/ARM/acle-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/acle-intrinsics.ll?rev=302126&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/acle-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/acle-intrinsics.ll Thu May 4 02:31:28 2017
@@ -0,0 +1,481 @@
+; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=armv7-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv6t2-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv7em-none-none-eabi %s -o - | FileCheck %s
+; RUN: llc -O1 -mtriple=thumbv8m.main-none-none-eabi -mattr=+dsp %s -o - | FileCheck %s
+
+
+; upper-bound of the immediate argument
+define i32 @ssat1(i32 %a) nounwind {
+; CHECK-LABEL: ssat1
+; CHECK: ssat r0, #32, r0
+ %tmp = call i32 @llvm.arm.ssat(i32 %a, i32 32)
+ ret i32 %tmp
+}
+
+; lower-bound of the immediate argument
+define i32 @ssat2(i32 %a) nounwind {
+; CHECK-LABEL: ssat2
+; CHECK: ssat r0, #1, r0
+ %tmp = call i32 @llvm.arm.ssat(i32 %a, i32 1)
+ ret i32 %tmp
+}
+
+; upper-bound of the immediate argument
+define i32 @usat1(i32 %a) nounwind {
+; CHECK-LABEL: usat1
+; CHECK: usat r0, #31, r0
+ %tmp = call i32 @llvm.arm.usat(i32 %a, i32 31)
+ ret i32 %tmp
+}
+
+; lower-bound of the immediate argument
+define i32 @usat2(i32 %a) nounwind {
+; CHECK-LABEL: usat2
+; CHECK: usat r0, #0, r0
+ %tmp = call i32 @llvm.arm.usat(i32 %a, i32 0)
+ ret i32 %tmp
+}
+
+define i32 @ssat16 (i32 %a) nounwind {
+; CHECK-LABEL: ssat16
+; CHECK: ssat16 r0, #1, r0
+; CHECK: ssat16 r0, #16, r0
+ %tmp = call i32 @llvm.arm.ssat16(i32 %a, i32 1)
+ %tmp2 = call i32 @llvm.arm.ssat16(i32 %tmp, i32 16)
+ ret i32 %tmp2
+}
+
+define i32 @usat16(i32 %a) nounwind {
+; CHECK-LABEL: usat16
+; CHECK: usat16 r0, #0, r0
+; CHECK: usat16 r0, #15, r0
+ %tmp = call i32 @llvm.arm.usat16(i32 %a, i32 0)
+ %tmp2 = call i32 @llvm.arm.usat16(i32 %tmp, i32 15)
+ ret i32 %tmp2
+}
+
+define i32 @pack_unpack(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: pack_unpack
+; CHECK: sxtab16 r0, r0, r1
+; CHECK: sxtb16 r0, r0
+; CHECK: uxtab16 r0, r1, r0
+; CHECK: uxtb16 r0, r0
+ %tmp = call i32 @llvm.arm.sxtab16(i32 %a, i32 %b)
+ %tmp1 = call i32 @llvm.arm.sxtb16(i32 %tmp)
+ %tmp2 = call i32 @llvm.arm.uxtab16(i32 %b, i32 %tmp1)
+ %tmp3 = call i32 @llvm.arm.uxtb16(i32 %tmp2)
+ ret i32 %tmp3
+}
+
+define i32 @sel(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: sel
+; CHECK sel r0, r0, r1
+ %tmp = call i32 @llvm.arm.sel(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qadd8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qadd8
+; CHECK: qadd8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.qadd8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qsub8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qsub8
+; CHECK: qsub8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.qsub8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @sadd8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: sadd8
+; CHECK: sadd8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.sadd8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @shadd8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: shadd8
+; CHECK: shadd8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.shadd8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @shsub8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: shsub8
+; CHECK: shsub8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.shsub8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @ssub8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: ssub8
+; CHECK: ssub8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.ssub8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uadd8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uadd8
+; CHECK: uadd8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uadd8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uhadd8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uhadd8
+; CHECK: uhadd8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uhadd8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uhsub8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uhsub8
+; CHECK: uhsub8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uhsub8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uqadd8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uqadd8
+; CHECK: uqadd8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uqadd8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uqsub8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uqsub8
+; CHECK: uqsub8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uqsub8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @usub8(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: usub8
+; CHECK: usub8 r0, r0, r1
+ %tmp = call i32 @llvm.arm.usub8(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @usad(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: usad
+; CHECK: usad8 r0, r0, r1
+; CHECK: usada8 r0, r0, r1, r2
+ %tmp = call i32 @llvm.arm.usad8(i32 %a, i32 %b)
+ %tmp1 = call i32 @llvm.arm.usada8(i32 %tmp, i32 %b, i32 %c)
+ ret i32 %tmp1
+}
+
+define i32 @qadd16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qadd16
+; CHECK: qadd16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.qadd16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qasx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qasx
+; CHECK: qasx r0, r0, r1
+ %tmp = call i32 @llvm.arm.qasx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qsax(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qsax
+; CHECK: qsax r0, r0, r1
+ %tmp = call i32 @llvm.arm.qsax(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @qsub16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: qsub16
+; CHECK: qsub16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.qsub16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @sadd16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: sadd16
+; CHECK: sadd16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.sadd16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @sasx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: sasx
+; CHECK: sasx r0, r0, r1
+ %tmp = call i32 @llvm.arm.sasx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @shadd16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: shadd16
+; CHECK: shadd16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.shadd16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @shasx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: shasx
+; CHECK: shasx r0, r0, r1
+ %tmp = call i32 @llvm.arm.shasx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @shsax(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: shsax
+; CHECK: shsax r0, r0, r1
+ %tmp = call i32 @llvm.arm.shsax(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @shsub16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: shsub16
+; CHECK: shsub16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.shsub16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @ssax(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: ssax
+; CHECK: ssax r0, r0, r1
+ %tmp = call i32 @llvm.arm.ssax(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @ssub16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: ssub16
+; CHECK: ssub16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.ssub16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uadd16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uadd16
+; CHECK: uadd16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uadd16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uasx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uasx
+; CHECK: uasx r0, r0, r1
+ %tmp = call i32 @llvm.arm.uasx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uhadd16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uhadd16
+; CHECK: uhadd16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uhadd16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uhasx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uhasx
+; CHECK: uhasx r0, r0, r1
+ %tmp = call i32 @llvm.arm.uhasx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uhsax(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uhsax
+; CHECK: uhsax r0, r0, r1
+ %tmp = call i32 @llvm.arm.uhsax(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uhsub16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uhsub16
+; CHECK: uhsub16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uhsub16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uqadd16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uqadd16
+; CHECK: uqadd16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uqadd16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uqasx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uqasx
+; CHECK: uqasx r0, r0, r1
+ %tmp = call i32 @llvm.arm.uqasx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uqsax(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uqsax
+; CHECK: uqsax r0, r0, r1
+ %tmp = call i32 @llvm.arm.uqsax(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @uqsub16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: uqsub16
+; CHECK: uqsub16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.uqsub16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @usax(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: usax
+; CHECK: usax r0, r0, r1
+ %tmp = call i32 @llvm.arm.usax(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @usub16(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: usub16
+; CHECK: usub16 r0, r0, r1
+ %tmp = call i32 @llvm.arm.usub16(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smlad(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: smlad
+; CHECK: smlad r0, r0, r1, r2
+ %tmp = call i32 @llvm.arm.smlad(i32 %a, i32 %b, i32 %c)
+ ret i32 %tmp
+}
+
+define i32 @smladx(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: smladx
+; CHECK: smladx r0, r0, r1, r2
+ %tmp = call i32 @llvm.arm.smladx(i32 %a, i32 %b, i32 %c)
+ ret i32 %tmp
+}
+
+define i64 @smlald(i32 %a, i32 %b, i64 %c) nounwind {
+; CHECK-LABEL: smlald
+; CHECK: smlald r2, r3, r0, r1
+ %tmp = call i64 @llvm.arm.smlald(i32 %a, i32 %b, i64 %c)
+ ret i64 %tmp
+}
+
+define i64 @smlaldx(i32 %a, i32 %b, i64 %c) nounwind {
+; CHECK-LABEL: smlaldx
+; CHECK: smlaldx r2, r3, r0, r1
+ %tmp = call i64 @llvm.arm.smlaldx(i32 %a, i32 %b, i64 %c)
+ ret i64 %tmp
+}
+
+define i32 @smlsd(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: smlsd
+; CHECK: smlsd r0, r0, r1, r2
+ %tmp = call i32 @llvm.arm.smlsd(i32 %a, i32 %b, i32 %c)
+ ret i32 %tmp
+}
+
+define i32 @smlsdx(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: smlsdx
+; CHECK: smlsdx r0, r0, r1, r2
+ %tmp = call i32 @llvm.arm.smlsdx(i32 %a, i32 %b, i32 %c)
+ ret i32 %tmp
+}
+
+define i64 @smlsld(i32 %a, i32 %b, i64 %c) nounwind {
+; CHECK-LABEL: smlsld
+; CHECK: smlsld r2, r3, r0, r1
+ %tmp = call i64 @llvm.arm.smlsld(i32 %a, i32 %b, i64 %c)
+ ret i64 %tmp
+}
+
+define i64 @smlsldx(i32 %a, i32 %b, i64 %c) nounwind {
+; CHECK-LABEL: smlsldx
+; CHECK: smlsldx r2, r3, r0, r1
+ %tmp = call i64 @llvm.arm.smlsldx(i32 %a, i32 %b, i64 %c)
+ ret i64 %tmp
+}
+
+define i32 @smuad(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: smuad
+; CHECK: smuad r0, r0, r1
+ %tmp = call i32 @llvm.arm.smuad(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smuadx(i32 %a, i32 %b) nounwind {
+;CHECK-LABEL: smuadx
+; CHECK: smuadx r0, r0, r1
+ %tmp = call i32 @llvm.arm.smuadx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smusd(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: smusd
+; CHECK: smusd r0, r0, r1
+ %tmp = call i32 @llvm.arm.smusd(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i32 @smusdx(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: smusdx
+; CHECK: smusdx r0, r0, r1
+ %tmp = call i32 @llvm.arm.smusdx(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
+declare i32 @llvm.arm.ssat16(i32, i32) nounwind
+declare i32 @llvm.arm.usat16(i32, i32) nounwind
+declare i32 @llvm.arm.sxtab16(i32, i32)
+declare i32 @llvm.arm.sxtb16(i32)
+declare i32 @llvm.arm.uxtab16(i32, i32)
+declare i32 @llvm.arm.uxtb16(i32)
+declare i32 @llvm.arm.sel(i32, i32) nounwind
+declare i32 @llvm.arm.qadd8(i32, i32) nounwind
+declare i32 @llvm.arm.qsub8(i32, i32) nounwind
+declare i32 @llvm.arm.sadd8(i32, i32) nounwind
+declare i32 @llvm.arm.shadd8(i32, i32) nounwind
+declare i32 @llvm.arm.shsub8(i32, i32) nounwind
+declare i32 @llvm.arm.ssub8(i32, i32) nounwind
+declare i32 @llvm.arm.uadd8(i32, i32) nounwind
+declare i32 @llvm.arm.uhadd8(i32, i32) nounwind
+declare i32 @llvm.arm.uhsub8(i32, i32) nounwind
+declare i32 @llvm.arm.uqadd8(i32, i32) nounwind
+declare i32 @llvm.arm.uqsub8(i32, i32) nounwind
+declare i32 @llvm.arm.usub8(i32, i32) nounwind
+declare i32 @llvm.arm.usad8(i32, i32) nounwind readnone
+declare i32 @llvm.arm.usada8(i32, i32, i32) nounwind readnone
+declare i32 @llvm.arm.qadd16(i32, i32) nounwind
+declare i32 @llvm.arm.qasx(i32, i32) nounwind
+declare i32 @llvm.arm.qsax(i32, i32) nounwind
+declare i32 @llvm.arm.qsub16(i32, i32) nounwind
+declare i32 @llvm.arm.sadd16(i32, i32) nounwind
+declare i32 @llvm.arm.sasx(i32, i32) nounwind
+declare i32 @llvm.arm.shadd16(i32, i32) nounwind
+declare i32 @llvm.arm.shasx(i32, i32) nounwind
+declare i32 @llvm.arm.shsax(i32, i32) nounwind
+declare i32 @llvm.arm.shsub16(i32, i32) nounwind
+declare i32 @llvm.arm.ssax(i32, i32) nounwind
+declare i32 @llvm.arm.ssub16(i32, i32) nounwind
+declare i32 @llvm.arm.uadd16(i32, i32) nounwind
+declare i32 @llvm.arm.uasx(i32, i32) nounwind
+declare i32 @llvm.arm.usax(i32, i32) nounwind
+declare i32 @llvm.arm.uhadd16(i32, i32) nounwind
+declare i32 @llvm.arm.uhasx(i32, i32) nounwind
+declare i32 @llvm.arm.uhsax(i32, i32) nounwind
+declare i32 @llvm.arm.uhsub16(i32, i32) nounwind
+declare i32 @llvm.arm.uqadd16(i32, i32) nounwind
+declare i32 @llvm.arm.uqasx(i32, i32) nounwind
+declare i32 @llvm.arm.uqsax(i32, i32) nounwind
+declare i32 @llvm.arm.uqsub16(i32, i32) nounwind
+declare i32 @llvm.arm.usub16(i32, i32) nounwind
+declare i32 @llvm.arm.smlad(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smladx(i32, i32, i32) nounwind
+declare i64 @llvm.arm.smlald(i32, i32, i64) nounwind
+declare i64 @llvm.arm.smlaldx(i32, i32, i64) nounwind
+declare i32 @llvm.arm.smlsd(i32, i32, i32) nounwind
+declare i32 @llvm.arm.smlsdx(i32, i32, i32) nounwind
+declare i64 @llvm.arm.smlsld(i32, i32, i64) nounwind
+declare i64 @llvm.arm.smlsldx(i32, i32, i64) nounwind
+declare i32 @llvm.arm.smuad(i32, i32) nounwind
+declare i32 @llvm.arm.smuadx(i32, i32) nounwind
+declare i32 @llvm.arm.smusd(i32, i32) nounwind
+declare i32 @llvm.arm.smusdx(i32, i32) nounwind
Removed: llvm/trunk/test/CodeGen/ARM/sat-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/sat-arith.ll?rev=302125&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/sat-arith.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/sat-arith.ll (removed)
@@ -1,63 +0,0 @@
-; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s -check-prefix=ARM -check-prefix=CHECK
-; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s -check-prefix=THUMB -check-prefix=CHECK
-
-; CHECK-LABEL: qadd
-define i32 @qadd() nounwind {
-; CHECK-DAG: mov{{s?}} [[R0:.*]], #8
-; CHECK-DAG: mov{{s?}} [[R1:.*]], #128
-; CHECK-ARM: qadd [[R0]], [[R1]], [[R0]]
-; CHECK-THRUMB: qadd [[R0]], [[R0]], [[R1]]
- %tmp = call i32 @llvm.arm.qadd(i32 128, i32 8)
- ret i32 %tmp
-}
-
-; CHECK-LABEL: qsub
-define i32 @qsub() nounwind {
-; CHECK-DAG: mov{{s?}} [[R0:.*]], #8
-; CHECK-DAG: mov{{s?}} [[R1:.*]], #128
-; CHECK-ARM: qsub [[R0]], [[R1]], [[R0]]
-; CHECK-THRUMB: qadd [[R0]], [[R1]], [[R0]]
- %tmp = call i32 @llvm.arm.qsub(i32 128, i32 8)
- ret i32 %tmp
-}
-
-; upper-bound of the immediate argument
-; CHECK-LABEL: ssat1
-define i32 @ssat1() nounwind {
-; CHECK: mov{{s?}} [[R0:.*]], #128
-; CHECK: ssat [[R1:.*]], #32, [[R0]]
- %tmp = call i32 @llvm.arm.ssat(i32 128, i32 32)
- ret i32 %tmp
-}
-
-; lower-bound of the immediate argument
-; CHECK-LABEL: ssat2
-define i32 @ssat2() nounwind {
-; CHECK: mov{{s?}} [[R0:.*]], #128
-; CHECK: ssat [[R1:.*]], #1, [[R0]]
- %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1)
- ret i32 %tmp
-}
-
-; upper-bound of the immediate argument
-; CHECK-LABEL: usat1
-define i32 @usat1() nounwind {
-; CHECK: mov{{s?}} [[R0:.*]], #128
-; CHECK: usat [[R1:.*]], #31, [[R0]]
- %tmp = call i32 @llvm.arm.usat(i32 128, i32 31)
- ret i32 %tmp
-}
-
-; lower-bound of the immediate argument
-; CHECK-LABEL: usat2
-define i32 @usat2() nounwind {
-; CHECK: mov{{s?}} [[R0:.*]], #128
-; CHECK: usat [[R1:.*]], #0, [[R0]]
- %tmp = call i32 @llvm.arm.usat(i32 128, i32 0)
- ret i32 %tmp
-}
-
-declare i32 @llvm.arm.qadd(i32, i32) nounwind
-declare i32 @llvm.arm.qsub(i32, i32) nounwind
-declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
-declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
More information about the llvm-commits
mailing list