[llvm] r286036 - [Hexagon] Split all selection patterns into a separate file
Krzysztof Parzyszek via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 5 08:01:38 PDT 2016
Author: kparzysz
Date: Sat Nov 5 10:01:38 2016
New Revision: 286036
URL: http://llvm.org/viewvc/llvm-project?rev=286036&view=rev
Log:
[Hexagon] Split all selection patterns into a separate file
This is just the basic separation, without any cleanup. Further changes
will follow.
Added:
llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
Modified:
llvm/trunk/lib/Target/Hexagon/Hexagon.td
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.td
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV3.td
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV4.td
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV5.td
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV60.td
llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoVector.td
Modified: llvm/trunk/lib/Target/Hexagon/Hexagon.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/Hexagon.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/Hexagon.td (original)
+++ llvm/trunk/lib/Target/Hexagon/Hexagon.td Sat Nov 5 10:01:38 2016
@@ -250,6 +250,7 @@ include "HexagonSchedule.td"
include "HexagonRegisterInfo.td"
include "HexagonCallingConv.td"
include "HexagonInstrInfo.td"
+include "HexagonPatterns.td"
include "HexagonIntrinsics.td"
include "HexagonIntrinsicsDerived.td"
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfo.td Sat Nov 5 10:01:38 2016
@@ -14,54 +14,6 @@
include "HexagonInstrFormats.td"
include "HexagonOperands.td"
include "HexagonInstrEnc.td"
-// Pattern fragment that combines the value type and the register class
-// into a single parameter.
-// The pat frags in the definitions below need to have a named register,
-// otherwise i32 will be assumed regardless of the register class. The
-// name of the register does not matter.
-def I1 : PatLeaf<(i1 PredRegs:$R)>;
-def I32 : PatLeaf<(i32 IntRegs:$R)>;
-def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
-def F32 : PatLeaf<(f32 IntRegs:$R)>;
-def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
-
-// Pattern fragments to extract the low and high subregisters from a
-// 64-bit value.
-def LoReg: OutPatFrag<(ops node:$Rs),
- (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
-def HiReg: OutPatFrag<(ops node:$Rs),
- (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
-
-def orisadd: PatFrag<(ops node:$Addr, node:$off),
- (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
- // Return the byte immediate const-1 as an SDNode.
- int32_t imm = N->getSExtValue();
- return XformSToSM1Imm(imm, SDLoc(N));
-}]>;
-
-// SDNode for converting immediate C to C-2.
-def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
- // Return the byte immediate const-2 as an SDNode.
- int32_t imm = N->getSExtValue();
- return XformSToSM2Imm(imm, SDLoc(N));
-}]>;
-
-// SDNode for converting immediate C to C-3.
-def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
- // Return the byte immediate const-3 as an SDNode.
- int32_t imm = N->getSExtValue();
- return XformSToSM3Imm(imm, SDLoc(N));
-}]>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
- // Return the byte immediate const-1 as an SDNode.
- uint32_t imm = N->getZExtValue();
- return XformUToUM1Imm(imm, SDLoc(N));
-}]>;
//===----------------------------------------------------------------------===//
// Compare
@@ -96,28 +48,11 @@ def C2_cmpeqi : T_CMP <"cmp.eq", 0b00
def C2_cmpgti : T_CMP <"cmp.gt", 0b01, 0, s10_0Ext>;
def C2_cmpgtui : T_CMP <"cmp.gtu", 0b10, 0, u9_0Ext>;
-class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
- : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
- (MI IntRegs:$src1, ImmPred:$src2)>;
-
-def : T_CMP_pat <C2_cmpeqi, seteq, s10_0ImmPred>;
-def : T_CMP_pat <C2_cmpgti, setgt, s10_0ImmPred>;
-def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>;
-
//===----------------------------------------------------------------------===//
// ALU32/ALU +
//===----------------------------------------------------------------------===//
// Add.
-def SDT_Int32Leaf : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
-def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
- [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-
-def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
-def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
-
let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
bit IsComm>
@@ -227,17 +162,6 @@ defm or : T_ALU32_3op_A2<"or", 0b001,
defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
-// Pats for instruction selection.
-class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
- : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
- (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
-
-def: BinOp32_pat<add, A2_add, i32>;
-def: BinOp32_pat<and, A2_and, i32>;
-def: BinOp32_pat<or, A2_or, i32>;
-def: BinOp32_pat<sub, A2_sub, i32>;
-def: BinOp32_pat<xor, A2_xor, i32>;
-
// A few special cases producing register pairs:
let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
def S2_packhl : T_ALU32_3op <"packhl", 0b101, 0b100, 0, 0>;
@@ -252,9 +176,6 @@ let OutOperandList = (outs DoubleRegs:$R
def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
}
-def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
-def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
-
let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg" in
class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
: ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -282,23 +203,6 @@ let Itinerary = ALU32_3op_tc_2early_SLOT
def C2_cmpgtu : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
}
-// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
-// that reverse the order of the operands.
-class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
-
-// Pats for compares. They use PatFrags as operands, not SDNodes,
-// since seteq/setgt/etc. are defined as ParFrags.
-class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
- : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
- (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
-
-def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
-def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
-def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
-
-def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
-def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
-
let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
(ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
@@ -320,9 +224,6 @@ def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
let Inst{4-0} = Rd;
}
-def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
- (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
-
// Combines the two immediates into a double register.
// Increase complexity to make it greater than any complexity of a combine
// that involves a register.
@@ -332,8 +233,7 @@ let isReMaterializable = 1, isMoveImm =
AddedComplexity = 75 in
def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8_0Ext:$s8, s8_0Imm:$S8),
"$Rdd = combine(#$s8, #$S8)",
- [(set (i64 DoubleRegs:$Rdd),
- (i64 (HexagonCOMBINE(i32 s32_0ImmPred:$s8), (i32 s8_0ImmPred:$S8))))]> {
+ []> {
bits<5> Rdd;
bits<8> s8;
bits<8> S8;
@@ -418,9 +318,6 @@ multiclass Addri_base<string mnemonic, S
defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
-def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)),
- (i32 (A2_addi I32:$Rs, imm:$s16))>;
-
let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in
def A2_iconst
: ALU32_ri <(outs IntRegs:$Rd),
@@ -438,7 +335,7 @@ class T_ALU32ri_logical <string mnemonic
: ALU32_ri <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, s10_0Ext:$s10),
"$Rd = "#mnemonic#"($Rs, #$s10)" ,
- [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32_0ImmPred:$s10))]> {
+ []> {
bits<5> Rd;
bits<5> Rs;
bits<10> s10;
@@ -483,13 +380,6 @@ def A2_nop: ALU32Inst <(outs), (ins), "n
let Inst{27-24} = 0b1111;
}
-def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
- (A2_subri imm:$s10, IntRegs:$Rs)>;
-
-// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
-def: Pat<(not (i32 IntRegs:$src1)),
- (A2_subri -1, IntRegs:$src1)>;
-
let hasSideEffects = 0, hasNewValue = 1 in
class T_tfr16<bit isHi>
: ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16_0Imm:$u16),
@@ -631,7 +521,7 @@ let InputType = "imm", isExtendable = 1,
isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16_0Ext:$s16), "$Rd = #$s16",
- [(set (i32 IntRegs:$Rd), s32_0ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+ [], "", ALU32_2op_tc_1_SLOT0123>,
ImmRegRel, PredRel {
bits<5> Rd;
bits<16> s16;
@@ -651,7 +541,7 @@ let isReMaterializable = 1, isMoveImm =
isAsmParserOnly = 1 in
def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8_0Imm64:$src1),
"$dst = #$src1",
- [(set (i64 DoubleRegs:$dst), s8_0Imm64Pred:$src1)]>;
+ []>;
// TODO: see if this instruction can be deleted..
let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
@@ -699,20 +589,13 @@ let opExtendable = 3 in
def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
"$Rd = mux($Pu, $Rs, #$s8)">;
-def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)),
- (C2_muxri I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)>;
-
-def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)),
- (C2_muxir I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)>;
-
// C2_muxii: Scalar mux immediates.
let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
opExtentBits = 8, opExtendable = 2 in
def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
(ins PredRegs:$Pu, s8_0Ext:$s8, s8_0Imm:$S8),
"$Rd = mux($Pu, #$s8, #$S8)" ,
- [(set (i32 IntRegs:$Rd),
- (i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)))] > {
+ []> {
bits<5> Rd;
bits<2> Pu;
bits<8> s8;
@@ -845,11 +728,6 @@ multiclass ZXTB_base <string mnemonic, b
defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
-def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
-def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
-def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
-def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
-
//===----------------------------------------------------------------------===//
// Template class for vector add and avg
//===----------------------------------------------------------------------===//
@@ -980,10 +858,6 @@ class T_vcmp <string Str, bits<4> minOp>
let Inst{12-8} = Rtt;
}
-class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
- : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
- (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
-
// Vector compare bytes
def A2_vcmpbeq : T_vcmp <"vcmpb.eq", 0b0110>;
def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
@@ -998,15 +872,6 @@ def A2_vcmpweq : T_vcmp <"vcmpw.eq", 0
def A2_vcmpwgt : T_vcmp <"vcmpw.gt", 0b0001>;
def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
-def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
-def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
-def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
-def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
-def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
-def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
-def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
-def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
-
//===----------------------------------------------------------------------===//
// ALU32/PERM -
//===----------------------------------------------------------------------===//
@@ -1112,23 +977,6 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs
def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
}
-// Add halfword.
-def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
- (A2_addh_l16_ll I32:$src1, I32:$src2)>;
-
-def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
- (A2_addh_l16_hl I32:$src1, I32:$src2)>;
-
-def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
- (A2_addh_h16_ll I32:$src1, I32:$src2)>;
-
-// Subtract halfword.
-def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
- (A2_subh_l16_ll I32:$src1, I32:$src2)>;
-
-def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
- (A2_subh_h16_ll I32:$src1, I32:$src2)>;
-
let hasSideEffects = 0, hasNewValue = 1 in
def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
(ins DoubleRegs:$Rs, DoubleRegs:$Rt),
@@ -1168,52 +1016,6 @@ def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
def A2_max : T_XTYPE_MIN_MAX < 1, 0 >;
def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
-// Here, depending on the operand being selected, we'll either generate a
-// min or max instruction.
-// Ex:
-// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
-// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
-// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
-// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
-
-multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
- InstHexagon Inst, InstHexagon SwapInst> {
- def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
- (VT RC:$src1), (VT RC:$src2)),
- (Inst RC:$src1, RC:$src2)>;
- def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
- (VT RC:$src2), (VT RC:$src1)),
- (SwapInst RC:$src1, RC:$src2)>;
-}
-
-
-multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
- defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
-
- def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
- (i32 PositiveHalfWord:$src2))),
- (i32 PositiveHalfWord:$src1),
- (i32 PositiveHalfWord:$src2))), i16),
- (Inst IntRegs:$src1, IntRegs:$src2)>;
-
- def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
- (i32 PositiveHalfWord:$src2))),
- (i32 PositiveHalfWord:$src2),
- (i32 PositiveHalfWord:$src1))), i16),
- (SwapInst IntRegs:$src1, IntRegs:$src2)>;
-}
-
-let AddedComplexity = 200 in {
- defm: MinMax_pats<setge, A2_max, A2_min>;
- defm: MinMax_pats<setgt, A2_max, A2_min>;
- defm: MinMax_pats<setle, A2_min, A2_max>;
- defm: MinMax_pats<setlt, A2_min, A2_max>;
- defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
- defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
- defm: MinMax_pats<setule, A2_minu, A2_maxu>;
- defm: MinMax_pats<setult, A2_minu, A2_maxu>;
-}
-
class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
: ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
"$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
@@ -1237,16 +1039,6 @@ def C2_cmpeqp : T_cmp64_rr<"cmp.eq", 0
def C2_cmpgtp : T_cmp64_rr<"cmp.gt", 0b010, 0>;
def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
-class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
- : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
- (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
-
-def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
-def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
-def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
-def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
-def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
-
def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
(ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
"$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
@@ -1296,9 +1088,6 @@ let isAdd = 1 in
def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
-def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
-def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
-
class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
bit IsNeg>
: T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
@@ -1308,10 +1097,6 @@ def A2_andp : T_ALU64_logical<"and", 0b0
def A2_orp : T_ALU64_logical<"or", 0b010, 0, 1, 0>;
def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
-def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
-def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
-def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
-
//===----------------------------------------------------------------------===//
// ALU64/ALU -
//===----------------------------------------------------------------------===//
@@ -1362,9 +1147,6 @@ def C2_any8 : T_LOGICAL_1OP<"any8", 0b00
def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
def C2_not : T_LOGICAL_1OP<"not", 0b10>;
-def: Pat<(i1 (not (i1 PredRegs:$Ps))),
- (C2_not PredRegs:$Ps)>;
-
let hasSideEffects = 0 in
class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
: CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
@@ -1390,12 +1172,6 @@ def C2_xor : T_LOGICAL_2OP<"xor", 0b010
def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
def C2_orn : T_LOGICAL_2OP<"or", 0b111, 1, 1>;
-def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
-
let hasSideEffects = 0, hasNewValue = 1 in
def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
"$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
@@ -1432,10 +1208,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd
// JR +
//===----------------------------------------------------------------------===//
-def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-
class CondStr<string CReg, bit True, bit New> {
string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
}
@@ -1611,23 +1383,11 @@ multiclass JMPRpt_base<string BaseOp> {
defm J2_jumpr : JMPRpt_base<"JMPr">;
defm J2_jump : JMPpt_base<"JMP">;
-def: Pat<(br bb:$dst),
- (J2_jump brtarget:$dst)>;
-def: Pat<(retflag),
- (PS_jmpret (i32 R31))>;
-def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
- (J2_jumpt PredRegs:$src1, bb:$offset)>;
-
// A return through builtin_eh_return.
let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
def EH_RETURN_JMPR : T_JMPr;
-def: Pat<(eh_return),
- (EH_RETURN_JMPR (i32 R31))>;
-def: Pat<(brind (i32 IntRegs:$dst)),
- (J2_jumpr IntRegs:$dst)>;
-
//===----------------------------------------------------------------------===//
// JR -
//===----------------------------------------------------------------------===//
@@ -1785,45 +1545,6 @@ def L2_loadalignh_io: T_loadalign_io <"m
let accessSize = ByteAccess, opExtentBits = 11 in
def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
-// Patterns to select load-indexed (i.e. load from base+offset).
-multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
- InstHexagon MI> {
- def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
- def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
- (VT (MI AddrFI:$fi, imm:$Off))>;
- def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))),
- (VT (MI AddrFI:$fi, imm:$Off))>;
- def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
- (VT (MI IntRegs:$Rs, imm:$Off))>;
- def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
-}
-
-let AddedComplexity = 20 in {
- defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
- defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
- defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
- defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
- defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
- defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
-
- defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
- defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
- defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
- defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
- defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
- defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
- defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
- defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
- // No sextloadi1.
-}
-
-// Sign-extending loads of i1 need to replicate the lowest bit throughout
-// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
-// do the trick.
-let AddedComplexity = 20 in
-def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
- (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
-
//===----------------------------------------------------------------------===//
// Post increment load
//===----------------------------------------------------------------------===//
@@ -2697,10 +2418,6 @@ def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy
def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
-def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
-def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
-def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
-
let hasNewValue = 1, opNewValue = 0 in
class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
: MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
@@ -2721,12 +2438,9 @@ class T_MType_mpy_ri <bit isNeg, Operand
}
let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
-def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext,
- [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32_0ImmPred:$u8))]>;
+def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, []>;
-def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm,
- [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
- u8_0ImmPred:$u8)))]>;
+def M2_mpysin : T_MType_mpy_ri <1, u8_0Imm, []>;
// Assember mapped to M2_mpyi
let isAsmParserOnly = 1 in
@@ -2742,9 +2456,7 @@ let isExtendable = 1, opExtendable = 2,
CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
isAsmParserOnly = 1 in
def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9_0Ext:$src2),
- "$dst = mpyi($src1, #$src2)",
- [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
- s32_0ImmPred:$src2))]>, ImmRegRel;
+ "$dst = mpyi($src1, #$src2)", []>, ImmRegRel;
let hasNewValue = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 3,
InputType = "imm" in
@@ -2793,28 +2505,16 @@ class T_MType_acc_rr <string mnemonic, b
}
let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
- def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext,
- [(set (i32 IntRegs:$dst),
- (add (mul IntRegs:$src2, u32_0ImmPred:$src3),
- IntRegs:$src1))]>, ImmRegRel;
-
- def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
- [(set (i32 IntRegs:$dst),
- (add (mul IntRegs:$src2, IntRegs:$src3),
- IntRegs:$src1))]>, ImmRegRel;
+ def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, []>, ImmRegRel;
+
+ def M2_maci : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, []>, ImmRegRel;
}
let CextOpcode = "ADD_acc" in {
let isExtentSigned = 1 in
- def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext,
- [(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), s32_0ImmPred:$src3),
- (i32 IntRegs:$src1)))]>, ImmRegRel;
-
- def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0,
- [(set (i32 IntRegs:$dst),
- (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
- (i32 IntRegs:$src1)))]>, ImmRegRel;
+ def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, []>, ImmRegRel;
+
+ def M2_acci : T_MType_acc_rr <"+= add", 0b000, 0b001, 0, []>, ImmRegRel;
}
let CextOpcode = "SUB_acc" in {
@@ -2830,21 +2530,6 @@ def M2_macsin : T_MType_acc_ri <"-= mpyi
def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
def M2_subacc : T_MType_acc_rr <"+= sub", 0b000, 0b011, 1>;
-class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
- PatLeaf ImmPred>
- : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
- (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
-
-class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
- : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
- (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
-def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>;
-
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>;
-def : T_MType_acc_pat2 <M2_nacci, add, sub>;
-
//===----------------------------------------------------------------------===//
// Template Class -- XType Vector Instructions
//===----------------------------------------------------------------------===//
@@ -3190,51 +2875,6 @@ def M2_vmac2 : T_XTYPE_mpy64_acc < "
def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
-def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
- (i64 (anyext (i32 IntRegs:$src2))))),
- (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
-
-def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
- (i64 (sext (i32 IntRegs:$src2))))),
- (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
-
-def: Pat<(i64 (mul (is_sext_i32:$src1),
- (is_sext_i32:$src2))),
- (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
-
-// Multiply and accumulate, use full result.
-// Rxx[+-]=mpy(Rs,Rt)
-
-def: Pat<(i64 (add (i64 DoubleRegs:$src1),
- (mul (i64 (sext (i32 IntRegs:$src2))),
- (i64 (sext (i32 IntRegs:$src3)))))),
- (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
- (mul (i64 (sext (i32 IntRegs:$src2))),
- (i64 (sext (i32 IntRegs:$src3)))))),
- (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (add (i64 DoubleRegs:$src1),
- (mul (i64 (anyext (i32 IntRegs:$src2))),
- (i64 (anyext (i32 IntRegs:$src3)))))),
- (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (add (i64 DoubleRegs:$src1),
- (mul (i64 (zext (i32 IntRegs:$src2))),
- (i64 (zext (i32 IntRegs:$src3)))))),
- (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
- (mul (i64 (anyext (i32 IntRegs:$src2))),
- (i64 (anyext (i32 IntRegs:$src3)))))),
- (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
- (mul (i64 (zext (i32 IntRegs:$src2))),
- (i64 (zext (i32 IntRegs:$src3)))))),
- (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
//===----------------------------------------------------------------------===//
// MTYPE/MPYH -
//===----------------------------------------------------------------------===//
@@ -3376,16 +3016,6 @@ defm storerd: ST_PostInc <"memd", "STrid
let accessSize = HalfWordAccess, isNVStorable = 0 in
defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
-class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
- InstHexagon MI>
- : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
- (MI I32:$src2, imm:$offset, Value:$src1)>;
-
-def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
-def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
-def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
-def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
-
//===----------------------------------------------------------------------===//
// Template class for post increment stores with register offset.
//===----------------------------------------------------------------------===//
@@ -3536,116 +3166,6 @@ let addrMode = BaseImmOffset, InputType
u6_1Ext, 0b011, 1>;
}
-// Patterns for generating stores, where the address takes different forms:
-// - frameindex,
-// - frameindex + offset,
-// - base + offset,
-// - simple (base address without offset).
-// These would usually be used together (via Storex_pat defined below), but
-// in some cases one may want to apply different properties (such as
-// AddedComplexity) to the individual patterns.
-class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
- : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
-multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
- InstHexagon MI> {
- def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
- (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
- def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
- (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
-}
-multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
- InstHexagon MI> {
- def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
- (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
- def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
- (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
-}
-class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
- : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
- (MI IntRegs:$Rs, 0, Value:$Rt)>;
-
-// Patterns for generating stores, where the address takes different forms,
-// and where the value being stored is transformed through the value modifier
-// ValueMod. The address forms are same as above.
-class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
- InstHexagon MI>
- : Pat<(Store Value:$Rs, AddrFI:$fi),
- (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
-multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
- PatFrag ValueMod, InstHexagon MI> {
- def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
- (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
- def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
- (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
-}
-multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
- PatFrag ValueMod, InstHexagon MI> {
- def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
- (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
- def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
- (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
-}
-class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
- InstHexagon MI>
- : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
- (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
-
-multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
- InstHexagon MI> {
- def: Storex_fi_pat <Store, Value, MI>;
- defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
- defm: Storex_add_pat <Store, Value, ImmPred, MI>;
-}
-
-multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
- PatFrag ValueMod, InstHexagon MI> {
- def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
- defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
- defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
-}
-
-// Regular stores in the DAG have two operands: value and address.
-// Atomic stores also have two, but they are reversed: address, value.
-// To use atomic stores with the patterns, they need to have their operands
-// swapped. This relies on the knowledge that the F.Fragment uses names
-// "ptr" and "val".
-class SwapSt<PatFrag F>
- : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
- F.OperandTransform>;
-
-let AddedComplexity = 20 in {
- defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
- defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
- defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
- defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
-
- defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
- defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
- defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
- defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
-}
-
-// Simple patterns should be tried with the least priority.
-def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
-def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
-def: Storex_simple_pat<store, I32, S2_storeri_io>;
-def: Storex_simple_pat<store, I64, S2_storerd_io>;
-
-def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
-def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
-def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
-def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
-
-let AddedComplexity = 20 in {
- defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
- defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
- defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
-}
-
-def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
-def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
-def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
-
// Store predicate.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
@@ -3952,8 +3472,6 @@ let isReMaterializable = 1, isAsCheapAsA
// Sign extend word to doubleword
def A2_sxtw : T_S2op_1_di <"sxtw", 0b01, 0b000>;
-def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
-
// Vector saturate and pack
let Defs = [USR_OVF] in {
def S2_svsathb : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
@@ -4002,17 +3520,6 @@ let Itinerary = S_2op_tc_2_SLOT23 in {
def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
}
-def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
- (i32 (sub 0, (i32 IntRegs:$src))),
- (i32 IntRegs:$src))),
- (A2_abs IntRegs:$src)>;
-
-let AddedComplexity = 50 in
-def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
- (i32 IntRegs:$src)),
- (sra (i32 IntRegs:$src), (i32 31)))),
- (A2_abs IntRegs:$src)>;
-
class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
bit isSat, bit isRnd, list<dag> pattern = []>
@@ -4050,9 +3557,7 @@ class T_S2op_2_ii <string mnemonic, bits
isSat, isRnd, pattern>;
class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
- : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0,
- [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
- (u5_0ImmPred:$u5)))]>;
+ : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, []>;
// Vector arithmetic shift right by immediate with truncate and pack
def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
@@ -4081,11 +3586,6 @@ let isAsmParserOnly = 1 in
def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
"$dst = not($src)">;
-def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)),
- (i32 1))),
- (i32 1))),
- (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>;
-
class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
: SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
"$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
@@ -4125,9 +3625,6 @@ def A2_vabshsat : T_S2op_3 <"vabsh", 0b0
def A2_vabsw : T_S2op_3 <"vabsw", 0b01, 0b110>;
def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
-def : Pat<(not (i64 DoubleRegs:$src1)),
- (A2_notp DoubleRegs:$src1)>;
-
//===----------------------------------------------------------------------===//
// STYPE/BIT +
//===----------------------------------------------------------------------===//
@@ -4167,20 +3664,6 @@ def S2_clb : T_COUNT_LEADING_32<"clb
def S2_clbp : T_COUNT_LEADING_64<"clb", 0b010, 0b000>;
def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
-// Count leading zeros.
-def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
-def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
-
-// Count trailing zeros: 32-bit.
-def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
-
-// Count leading ones.
-def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
-def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
-
-// Count trailing ones: 32-bit.
-def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
-
// The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
// Bit set/clear/toggle
@@ -4223,19 +3706,6 @@ def S2_clrbit_r : T_SCT_BIT_REG<"clrb
def S2_setbit_r : T_SCT_BIT_REG<"setbit", 0b00>;
def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
-def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5_0ImmPred:$u5)))),
- (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
-def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))),
- (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
-def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))),
- (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
-def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
- (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
- (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
- (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
-
// Bit test
let hasSideEffects = 0 in
@@ -4274,17 +3744,6 @@ class T_TEST_BIT_REG<string MnOp, bit Is
def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
-let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
- def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
- (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
- def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
- (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
- def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
- (S2_tstbit_i IntRegs:$Rs, 0)>;
- def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
- (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
-}
-
let hasSideEffects = 0 in
class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6_0Imm:$u6),
@@ -4323,17 +3782,6 @@ def C2_bitsclri : T_TEST_BITS_IMM<"bitsc
def C2_bitsclr : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
def C2_bitsset : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
-let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
- def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6_0ImmPred:$u6), 0)),
- (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>;
- def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
- (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
-}
-
-let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
-def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
- (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
-
//===----------------------------------------------------------------------===//
// STYPE/BIT -
//===----------------------------------------------------------------------===//
@@ -4349,14 +3797,6 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$R
// XTYPE/PERM +
//===----------------------------------------------------------------------===//
-def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
- (i32 8)),
- (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
- (i32 16)),
- (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
- (zextloadi8 (i32 IntRegs:$b))),
- (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
-
//===----------------------------------------------------------------------===//
// XTYPE/PERM -
//===----------------------------------------------------------------------===//
@@ -4396,24 +3836,6 @@ let hasSideEffects = 0, isCodeGenOnly =
def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
"$dst = $src">;
-
-// Patterns for loads of i1:
-def: Pat<(i1 (load AddrFI:$fi)),
- (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
-def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32_0ImmPred:$Off))),
- (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
-def: Pat<(i1 (load (i32 IntRegs:$Rs))),
- (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
-
-def I1toI32: OutPatFrag<(ops node:$Rs),
- (C2_muxii (i1 $Rs), 1, 0)>;
-
-def I32toI1: OutPatFrag<(ops node:$Rs),
- (i1 (C2_tfrrp (i32 $Rs)))>;
-
-defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
-def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
-
//===----------------------------------------------------------------------===//
// STYPE/PRED -
//===----------------------------------------------------------------------===//
@@ -4437,9 +3859,7 @@ class S_2OpInstImm<string Mnemonic, bits
}
class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
- : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6_0Imm,
- [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
- u6_0ImmPred:$src2))]> {
+ : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6_0Imm, []> {
bits<6> src2;
let Inst{13-8} = src2;
}
@@ -4453,9 +3873,7 @@ def S2_lsr_i_p : S_2OpInstImmI6<"lsr", s
let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
(ins IntRegs:$Rt, IntRegs:$Rs, u3_0Imm:$u3),
- "$Rd = addasl($Rt, $Rs, #$u3)" ,
- [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt),
- (shl (i32 IntRegs:$Rs), u3_0ImmPred:$u3)))],
+ "$Rd = addasl($Rt, $Rs, #$u3)" , [],
"", S_3op_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rt;
@@ -4497,12 +3915,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs
//===----------------------------------------------------------------------===//
// SYSTEM/USER +
//===----------------------------------------------------------------------===//
-def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
-
let hasSideEffects = 1, isSoloAX = 1 in
-def Y2_barrier : SYSInst<(outs), (ins),
- "barrier",
- [(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
+def Y2_barrier : SYSInst<(outs), (ins), "barrier", [],"",ST_tc_st_SLOT0> {
let Inst{31-28} = 0b1010;
let Inst{27-21} = 0b1000000;
}
@@ -4524,9 +3938,6 @@ let isMoveImm = 1, isAsCheapAsAMove = 1,
(ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">;
}
-def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
- (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>;
-
//===----------------------------------------------------------------------===//
// CRUSER - Type.
//===----------------------------------------------------------------------===//
@@ -4734,14 +4145,6 @@ def Y4_trace: CRInst <(outs), (ins IntRe
let Inst{20-16} = Rs;
}
-// Support for generating global address.
-// Taken from X86InstrInfo.td.
-def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
- SDTCisVT<1, i32>,
- SDTCisPtrTy<0>]>;
-def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
-def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
-
// HI/LO Instructions
let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
hasNewValue = 1, opNewValue = 0 in
@@ -4768,52 +4171,26 @@ let isAsmParserOnly = 1 in {
let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in {
def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v),
- "$Rd = CONST32(#$v)", [(set I32:$Rd, imm:$v)]>;
+ "$Rd = CONST32(#$v)", []>;
def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v),
- "$Rd = CONST64(#$v)", [(set I64:$Rd, imm:$v)]>;
+ "$Rd = CONST64(#$v)", []>;
}
-// Map TLS addressses to A2_tfrsi.
-def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
-def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
-
let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
isCodeGenOnly = 1 in
-def PS_true : SInst<(outs PredRegs:$dst), (ins), "",
- [(set (i1 PredRegs:$dst), 1)]>;
+def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>;
let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
isCodeGenOnly = 1 in
-def PS_false : SInst<(outs PredRegs:$dst), (ins), "",
- [(set (i1 PredRegs:$dst), 0)]>;
-
-// Pseudo instructions.
-def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
- SDTCisVT<1, i32> ]>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-
-// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
-// Optional Flag and Variable Arguments.
-// Its 1 Operand has pointer type.
-def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>;
let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
- ".error \"should not emit\" ",
- [(callseq_start timm:$amt)]>;
+ ".error \"should not emit\" ", []>;
let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- ".error \"should not emit\" ",
- [(callseq_end timm:$amt1, timm:$amt2)]>;
+ ".error \"should not emit\" ", []>;
// Call subroutine indirectly.
let Defs = VolatileV3.Regs in
@@ -4829,264 +4206,15 @@ let isPseudo = 1, isCall = 1, isReturn =
isTerminator = 1, isCodeGenOnly = 1 in
def PS_tailcall_i : JInst<(outs), (ins calltarget:$dst), "", []>;
-//Tail calls.
-def: Pat<(HexagonTCRet tglobaladdr:$dst),
- (PS_tailcall_i tglobaladdr:$dst)>;
-def: Pat<(HexagonTCRet texternalsym:$dst),
- (PS_tailcall_i texternalsym:$dst)>;
-def: Pat<(HexagonTCRet I32:$dst),
- (PS_tailcall_r I32:$dst)>;
-
-// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
-def: Pat<(and (i32 IntRegs:$src1), 65535),
- (A2_zxth IntRegs:$src1)>;
-
-// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
-def: Pat<(and (i32 IntRegs:$src1), 255),
- (A2_zxtb IntRegs:$src1)>;
-
-// Map Add(p1, true) to p1 = not(p1).
-// Add(p1, false) should never be produced,
-// if it does, it got to be mapped to NOOP.
-def: Pat<(add (i1 PredRegs:$src1), -1),
- (C2_not PredRegs:$src1)>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def: Pat<(select (not (i1 PredRegs:$src1)), s8_0ImmPred:$src2, s32_0ImmPred:$src3),
- (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = C2_muxir(p0, r1, #i)
-def: Pat<(select (not (i1 PredRegs:$src1)), s32_0ImmPred:$src2,
- (i32 IntRegs:$src3)),
- (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = C2_muxri (p0, #i, r1)
-def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32_0ImmPred:$src3),
- (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>;
-
-// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
- (J2_jumpf PredRegs:$src1, bb:$offset)>;
-
-// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
-def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
- (A2_sxtw (LoReg DoubleRegs:$src1))>;
-
-// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
-def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
- (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
-
-// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
-def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
- (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
-
-// We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a J2_jumpf.
-def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- bb:$offset),
- (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
- bb:$offset)>;
-
-def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10_0ImmPred:$src2)),
- bb:$offset),
- (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10_0ImmPred:$src2), bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
- (J2_jumpf PredRegs:$src1, bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
- (J2_jumpt PredRegs:$src1, bb:$offset)>;
-
-// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
-def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8_0ImmPred:$src2)), bb:$offset),
- (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8_0ImmPred:$src2)),
- bb:$offset)>;
-
-// Map from a 64-bit select to an emulated 64-bit mux.
-// Hexagon does not support 64-bit MUXes; so emulate with combines.
-def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
- (i64 DoubleRegs:$src3)),
- (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
- (HiReg DoubleRegs:$src3)),
- (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
- (LoReg DoubleRegs:$src3)))>;
-
-// Map from a 1-bit select to logical ops.
-// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
-def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
- (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
- (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
-
-// Map for truncating from 64 immediates to 32 bit immediates.
-def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
- (LoReg DoubleRegs:$src)>;
-
-// Map for truncating from i64 immediates to i1 bit immediates.
-def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
- (C2_tfrrp (LoReg DoubleRegs:$src))>;
-
-// rs <= rt -> !(rs > rt).
-let AddedComplexity = 30 in
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
-
-// rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
-
-// Rss <= Rtt -> !(Rss > Rtt).
-def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Map cmpne -> cmpeq.
-// Hexagon_TODO: We should improve on this.
-// rs != rt -> !(rs == rt).
-let AddedComplexity = 30 in
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>;
-
-// Convert setne back to xor for hexagon since we compute w/ pred registers.
-def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
- (C2_xor PredRegs:$src1, PredRegs:$src2)>;
-
-// Map cmpne(Rss) -> !cmpew(Rss).
-// rs != rt -> !(rs == rt).
-def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
-// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
- (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
-
-// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
-let AddedComplexity = 30 in
-def: Pat<(i1 (setge (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
-
-// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
-// rss >= rtt -> !(rtt > rss).
-def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
-
-// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
-// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
-// rs < rt -> !(rs >= rt).
-let AddedComplexity = 30 in
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1,
- (DEC_CONST_SIGNED s32_0ImmPred:$src2)))>;
-
-// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
-def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
- (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
-
-// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
-def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32_0ImmPred:$src2)),
- (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32_0ImmPred:$src2))>;
-
-// Generate cmpgtu(Rs, #u9)
-def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32_0ImmPred:$src2)),
- (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>;
-
-// Map from Rs >= Rt -> !(Rt > Rs).
-// rs >= rt -> !(rt > rs).
-def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
-
-// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
-// Map from (Rs <= Rt) -> !(Rs > Rt).
-def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
- (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Sign extends.
-// i1 -> i32
-def: Pat<(i32 (sext (i1 PredRegs:$src1))),
- (C2_muxii PredRegs:$src1, -1, 0)>;
-
-// i1 -> i64
-def: Pat<(i64 (sext (i1 PredRegs:$src1))),
- (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
-
-// Zero extends.
-// i1 -> i32
-def: Pat<(i32 (zext (i1 PredRegs:$src1))),
- (C2_muxii PredRegs:$src1, 1, 0)>;
-
-// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
- (C2_muxii PredRegs:$src1, 1, 0)>;
-
-// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
-def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
- (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
-
-// Clear the sign bit in a 64-bit register.
-def ClearSign : OutPatFrag<(ops node:$Rss),
- (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>;
-
-def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
- (A2_addp
- (M2_dpmpyuu_acc_s0
- (S2_lsr_i_p
- (A2_addp
- (M2_dpmpyuu_acc_s0
- (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
- (HiReg $Rss),
- (LoReg $Rtt)),
- (A2_combinew (A2_tfrsi 0),
- (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
- 32),
- (HiReg $Rss),
- (HiReg $Rtt)),
- (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
-
-// Multiply 64-bit unsigned and use upper result.
-def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
-
-// Multiply 64-bit signed and use upper result.
-//
-// For two signed 64-bit integers A and B, let A' and B' denote A and B
-// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
-// sign bit of A (and identically for B). With this notation, the signed
-// product A*B can be written as:
-// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
-// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
-// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
-// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
-
-def : Pat <(mulhs I64:$Rss, I64:$Rtt),
- (A2_subp
- (MulHU $Rss, $Rtt),
- (A2_addp
- (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
- (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
-
-// Hexagon specific ISD nodes.
-def SDTHexagonALLOCA : SDTypeProfile<1, 2,
- [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
- [SDNPHasChain]>;
-
// The reason for the custom inserter is to record all ALLOCA instructions
// in MachineFunctionInfo.
let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in
def PS_alloca: ALU32Inst<(outs IntRegs:$Rd),
- (ins IntRegs:$Rs, u32_0Imm:$A), "",
- [(set (i32 IntRegs:$Rd),
- (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>;
+ (ins IntRegs:$Rs, u32_0Imm:$A), "", []>;
let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
def PS_aligna : ALU32Inst<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>;
-def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
-def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
-
-def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16_0Ext:$dst)>;
-def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16_0Ext:$dst)>;
-
// XTYPE/SHIFT
//
//===----------------------------------------------------------------------===//
@@ -5103,10 +4231,7 @@ class T_shift_imm_acc_r <string opc1, st
SDNode OpNode2, bits<3> majOp, bits<2> minOp>
: SInst_acc<(outs IntRegs:$Rx),
(ins IntRegs:$src1, IntRegs:$Rs, u5_0Imm:$u5),
- "$Rx "#opc2#opc1#"($Rs, #$u5)",
- [(set (i32 IntRegs:$Rx),
- (OpNode2 (i32 IntRegs:$src1),
- (OpNode1 (i32 IntRegs:$Rs), u5_0ImmPred:$u5)))],
+ "$Rx "#opc2#opc1#"($Rs, #$u5)", [],
"$src1 = $Rx", S_2op_tc_2_SLOT23> {
bits<5> Rx;
bits<5> Rs;
@@ -5133,10 +4258,7 @@ class T_shift_reg_acc_r <string opc1, st
SDNode OpNode2, bits<2> majOp, bits<2> minOp>
: SInst_acc<(outs IntRegs:$Rx),
(ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
- "$Rx "#opc2#opc1#"($Rs, $Rt)",
- [(set (i32 IntRegs:$Rx),
- (OpNode2 (i32 IntRegs:$src1),
- (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))],
+ "$Rx "#opc2#opc1#"($Rs, $Rt)", [],
"$src1 = $Rx", S_3op_tc_2_SLOT23 > {
bits<5> Rx;
bits<5> Rs;
@@ -5160,10 +4282,7 @@ class T_shift_imm_acc_p <string opc1, st
SDNode OpNode2, bits<3> majOp, bits<2> minOp>
: SInst_acc<(outs DoubleRegs:$Rxx),
(ins DoubleRegs:$src1, DoubleRegs:$Rss, u6_0Imm:$u6),
- "$Rxx "#opc2#opc1#"($Rss, #$u6)",
- [(set (i64 DoubleRegs:$Rxx),
- (OpNode2 (i64 DoubleRegs:$src1),
- (OpNode1 (i64 DoubleRegs:$Rss), u6_0ImmPred:$u6)))],
+ "$Rxx "#opc2#opc1#"($Rss, #$u6)", [],
"$src1 = $Rxx", S_2op_tc_2_SLOT23> {
bits<5> Rxx;
bits<5> Rss;
@@ -5190,10 +4309,7 @@ class T_shift_reg_acc_p <string opc1, st
SDNode OpNode2, bits<3> majOp, bits<2> minOp>
: SInst_acc<(outs DoubleRegs:$Rxx),
(ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
- "$Rxx "#opc2#opc1#"($Rss, $Rt)",
- [(set (i64 DoubleRegs:$Rxx),
- (OpNode2 (i64 DoubleRegs:$src1),
- (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))],
+ "$Rxx "#opc2#opc1#"($Rss, $Rt)", [],
"$src1 = $Rxx", S_3op_tc_2_SLOT23> {
bits<5> Rxx;
bits<5> Rss;
@@ -5365,9 +4481,7 @@ class T_S3op_3 <string mnemonic, Registe
let hasNewValue = 1 in
class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0,
- [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
- (i32 IntRegs:$src2)))]>;
+ : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, []>;
let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
@@ -5375,9 +4489,7 @@ class T_S3op_shift32_Sat <string mnemoni
class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
- : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0,
- [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
- (i32 IntRegs:$src2)))]>;
+ : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, []>;
class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
@@ -5523,35 +4635,6 @@ def S2_insertp_rp : T_S3op_insert<"inser
def S2_insertp : T_S2op_insert <0b0011, DoubleRegs, u6_0Imm>;
-def SDTHexagonINSERT:
- SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
-def SDTHexagonINSERTRP:
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisVT<3, i64>]>;
-
-def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
-def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
-
-def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
- (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>;
-def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
- (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>;
-def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
- (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
-def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
- (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
-
-let AddedComplexity = 100 in
-def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
- (i32 (extloadi8 (add I32:$b, 3))),
- 24, 8),
- (i32 16)),
- (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
- (zextloadi8 I32:$b)),
- (A2_swiz (L2_loadri_io I32:$b, 0))>;
-
-
//===----------------------------------------------------------------------===//
// Template class for 'extract bitfield' instructions
//===----------------------------------------------------------------------===//
@@ -5618,29 +4701,6 @@ let hasNewValue = 1 in {
def S2_extractu : T_S2op_extract <"extractu", 0b1101, IntRegs, u5_0Imm>;
}
-def SDTHexagonEXTRACTU:
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDTHexagonEXTRACTURP:
- SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVT<2, i64>]>;
-
-def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
-def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
-
-def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
- (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>;
-def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
- (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>;
-def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
- (S2_extractu_rp I32:$src1, I64:$src2)>;
-def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
- (S2_extractup_rp I64:$src1, I64:$src2)>;
-
-// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
-def: Pat<(mul (i32 IntRegs:$src1), (ineg n8_0ImmPred:$src2)),
- (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>;
-
//===----------------------------------------------------------------------===//
// :raw for of tableindx[bdhw] insns
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV3.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV3.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV3.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV3.td Sat Nov 5 10:01:38 2016
@@ -11,12 +11,6 @@
//
//===----------------------------------------------------------------------===//
-def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
-
-def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
-
//===----------------------------------------------------------------------===//
// J +
//===----------------------------------------------------------------------===//
@@ -107,9 +101,7 @@ def A2_addsph : T_ALU64_addsp_hl<":raw:h
let hasSideEffects = 0, isAsmParserOnly = 1 in
def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
- (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
- [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
- (i64 DoubleRegs:$Rt))))],
+ (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [],
"", ALU64_tc_1_SLOT23>;
@@ -139,60 +131,10 @@ def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
def A2_maxp : T_XTYPE_MIN_MAX_P<1, 0>;
def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
-multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
- defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
-}
-
-let AddedComplexity = 200 in {
- defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
- defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
- defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
- defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
- defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
- defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
- defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
- defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
-}
-
//===----------------------------------------------------------------------===//
// ALU64/ALU -
//===----------------------------------------------------------------------===//
-
-
-
-//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
-// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
-// (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
-
-// Map call instruction
-def : Pat<(callv3 I32:$dst),
- (J2_callr I32:$dst)>;
-def : Pat<(callv3 tglobaladdr:$dst),
- (J2_call tglobaladdr:$dst)>;
-def : Pat<(callv3 texternalsym:$dst),
- (J2_call texternalsym:$dst)>;
-def : Pat<(callv3 tglobaltlsaddr:$dst),
- (J2_call tglobaltlsaddr:$dst)>;
-
-def : Pat<(callv3nr I32:$dst),
- (PS_callr_nr I32:$dst)>;
-def : Pat<(callv3nr tglobaladdr:$dst),
- (PS_call_nr tglobaladdr:$dst)>;
-def : Pat<(callv3nr texternalsym:$dst),
- (PS_call_nr texternalsym:$dst)>;
-
//===----------------------------------------------------------------------===//
// :raw form of vrcmpys:hi/lo insns
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV4.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV4.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV4.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV4.td Sat Nov 5 10:01:38 2016
@@ -30,9 +30,6 @@ def DuplexIClassD: InstDuplex < 0xD >;
def DuplexIClassE: InstDuplex < 0xE >;
def DuplexIClassF: InstDuplex < 0xF >;
-def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
-def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
-
let hasSideEffects = 0 in
class T_Immext<Operand ImmType>
: EXTENDERInst<(outs), (ins ImmType:$imm),
@@ -53,14 +50,6 @@ let isCodeGenOnly = 1 in {
def A4_ext_g : T_Immext<globaladdress>;
}
-def BITPOS32 : SDNodeXForm<imm, [{
- // Return the bit position we will set [0-31].
- // As an SDNode.
- int32_t imm = N->getSExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-
// Hexagon V4 Architecture spec defines 8 instruction classes:
// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
// compiler)
@@ -145,24 +134,6 @@ def C4_cmpneq : T_ALU32_3op_cmp<"!cmp.e
def C4_cmplte : T_ALU32_3op_cmp<"!cmp.gt", 0b10, 1, 0>;
def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
-// Pats for instruction selection.
-
-// A class to embed the usual comparison patfrags within a zext to i32.
-// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
-// names, or else the frag's "body" won't match the operands.
-class CmpInReg<PatFrag Op>
- : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
-
-def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
-def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
-
-def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
-def: T_cmp32_rr_pat<C4_cmplte, setle, i1>;
-def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
-
-def: T_cmp32_rr_pat<C4_cmplte, RevCmp<setge>, i1>;
-def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
-
class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
: SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
"$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
@@ -192,21 +163,6 @@ def A4_cmpheq : T_CMP_rrbh<"cmph.eq",
def A4_cmphgt : T_CMP_rrbh<"cmph.gt", 0b100, 0>;
def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
-let AddedComplexity = 100 in {
- def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
- 255), 0)),
- (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
- def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
- 255), 0)),
- (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
- def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
- 65535), 0)),
- (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
- def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
- 65535), 0)),
- (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
-}
-
class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
: ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
@@ -272,16 +228,6 @@ class T_RCMP_EQ_ri<string mnemonic, bit
def A4_rcmpeqi : T_RCMP_EQ_ri<"cmp.eq", 0>;
def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
-def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))),
- (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
-def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))),
- (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
-
-// Preserve the S2_tstbit_r generation
-def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
- (i32 IntRegs:$src1))), 0)))),
- (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
-
//===----------------------------------------------------------------------===//
// ALU32 -
//===----------------------------------------------------------------------===//
@@ -317,16 +263,6 @@ let opExtendable = 1 in
def A4_combineir : T_Combine1<0b01, (ins s8_0Ext:$s8, IntRegs:$Rs),
"$Rdd = combine(#$s8, $Rs)">;
-// The complexity of the combines involving immediates should be greater
-// than the complexity of the combine with two registers.
-let AddedComplexity = 50 in {
-def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i),
- (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>;
-
-def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r),
- (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>;
-}
-
// A4_combineii: Set two small immediates.
let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U6),
@@ -343,12 +279,6 @@ def A4_combineii: ALU32Inst<(outs Double
let Inst{4-0} = Rdd;
}
-// The complexity of the combine with two immediates should be greater than
-// the complexity of a combine involving a register.
-let AddedComplexity = 75 in
-def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6),
- (A4_combineii imm:$s8, imm:$u6)>;
-
//===----------------------------------------------------------------------===//
// ALU32/PERM -
//===----------------------------------------------------------------------===//
@@ -357,39 +287,6 @@ def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8
// LD +
//===----------------------------------------------------------------------===//
-def Zext64: OutPatFrag<(ops node:$Rs),
- (i64 (A4_combineir 0, (i32 $Rs)))>;
-def Sext64: OutPatFrag<(ops node:$Rs),
- (i64 (A2_sxtw (i32 $Rs)))>;
-
-// Patterns to generate indexed loads with different forms of the address:
-// - frameindex,
-// - base + offset,
-// - base (without offset).
-multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
- PatLeaf ImmPred, InstHexagon MI> {
- def: Pat<(VT (Load AddrFI:$fi)),
- (VT (ValueMod (MI AddrFI:$fi, 0)))>;
- def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
- (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
- def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
- (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
- def: Pat<(VT (Load (i32 IntRegs:$Rs))),
- (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
-}
-
-defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
-defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
-defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
-defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
-
-// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
-def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
-
//===----------------------------------------------------------------------===//
// Template class for load instructions with Absolute set addressing mode.
//===----------------------------------------------------------------------===//
@@ -497,42 +394,6 @@ let accessSize = DoubleWordAccess in
def L4_loadrd_ur : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
-multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
- def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$src3)))),
- (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>;
- def : Pat <(VT (ldOp (add IntRegs:$src1,
- (HexagonCONST32 tglobaladdr:$src2)))),
- (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
-
- def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
- (HexagonCONST32 tconstpool:$src3)))),
- (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>;
- def : Pat <(VT (ldOp (add IntRegs:$src1,
- (HexagonCONST32 tconstpool:$src2)))),
- (MI IntRegs:$src1, 0, tconstpool:$src2)>;
-
- def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
- (HexagonCONST32 tjumptable:$src3)))),
- (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>;
- def : Pat <(VT (ldOp (add IntRegs:$src1,
- (HexagonCONST32 tjumptable:$src2)))),
- (MI IntRegs:$src1, 0, tjumptable:$src2)>;
-}
-
-let AddedComplexity = 60 in {
-defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
-defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
-defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
-
-defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
-defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
-defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
-
-defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
-defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
-}
-
//===----------------------------------------------------------------------===//
// Template classes for the non-predicated load instructions with
// base + register offset addressing mode
@@ -630,50 +491,6 @@ defm loadri : ld_idxd_shl<"memw", "LDriw
let accessSize = DoubleWordAccess in
defm loadrd : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
-// 'def pats' for load instructions with base + register offset and non-zero
-// immediate value. Immediate value is used to left-shift the second
-// register operand.
-class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
- : Pat<(VT (Load (add (i32 IntRegs:$Rs),
- (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2))))),
- (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
-
-let AddedComplexity = 40 in {
- def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
- def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
- def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
- def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
- def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
- def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
- def: Loadxs_pat<load, i32, L4_loadri_rr>;
- def: Loadxs_pat<load, i64, L4_loadrd_rr>;
-}
-
-// 'def pats' for load instruction base + register offset and
-// zero immediate value.
-class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
- : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
- (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
-
-let AddedComplexity = 20 in {
- def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
- def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
- def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
- def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
- def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
- def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
- def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
- def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
-}
-
-// zext i1->i64
-def: Pat<(i64 (zext (i1 PredRegs:$src1))),
- (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
-
-// zext i32->i64
-def: Pat<(i64 (zext (i32 IntRegs:$src1))),
- (Zext64 IntRegs:$src1)>;
-
//===----------------------------------------------------------------------===//
// LD -
//===----------------------------------------------------------------------===//
@@ -796,29 +613,6 @@ def S4_storeri_ur : T_StoreAbsReg <"memw
def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
DoubleWordAccess>;
-let AddedComplexity = 40 in
-multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
- PatFrag stOp> {
- def : Pat<(stOp (VT RC:$src4),
- (add (shl (i32 IntRegs:$src1), u2_0ImmPred:$src2),
- u32_0ImmPred:$src3)),
- (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>;
-
- def : Pat<(stOp (VT RC:$src4),
- (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
- (HexagonCONST32 tglobaladdr:$src3))),
- (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
-
- def : Pat<(stOp (VT RC:$src4),
- (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
- (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
-}
-
-defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
-defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
-defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
-defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
-
let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
@@ -1037,54 +831,6 @@ let addrMode = BaseRegOffset, InputType
defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
}
-class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
- : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
- (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2)))),
- (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
-
-let AddedComplexity = 40 in {
- def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
- def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
- def: Storexs_pat<store, I32, S4_storeri_rr>;
- def: Storexs_pat<store, I64, S4_storerd_rr>;
-}
-
-def s30_2ProperPred : PatLeaf<(i32 imm), [{
- int64_t v = (int64_t)N->getSExtValue();
- return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
-}]>;
-def RoundTo8 : SDNodeXForm<imm, [{
- int32_t Imm = N->getSExtValue();
- return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
-}]>;
-
-let AddedComplexity = 40 in
-def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
- (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
-
-class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
- : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
- (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
-
-let AddedComplexity = 20 in {
- def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>;
- def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
- def: Store_rr_pat<store, I32, S4_storeri_rr>;
- def: Store_rr_pat<store, I64, S4_storerd_rr>;
-}
-
-
-// memd(Rx++#s4:3)=Rtt
-// memd(Rx++#s4:3:circ(Mu))=Rtt
-// memd(Rx++I:circ(Mu))=Rtt
-// memd(Rx++Mu)=Rtt
-// memd(Rx++Mu:brev)=Rtt
-// memd(gp+#u16:3)=Rtt
-
-// Store doubleword conditionally.
-// if ([!]Pv[.new]) memd(#u6)=Rtt
-// TODO: needs to be implemented.
-
//===----------------------------------------------------------------------===//
// Template class
//===----------------------------------------------------------------------===//
@@ -1188,126 +934,6 @@ let hasSideEffects = 0, addrMode = BaseI
defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
}
-def IMM_BYTE : SDNodeXForm<imm, [{
- // -1 etc is represented as 255 etc
- // assigning to a byte restores our desired signed value.
- int8_t imm = N->getSExtValue();
- return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
-}]>;
-
-def IMM_HALF : SDNodeXForm<imm, [{
- // -1 etc is represented as 65535 etc
- // assigning to a short restores our desired signed value.
- int16_t imm = N->getSExtValue();
- return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
-}]>;
-
-def IMM_WORD : SDNodeXForm<imm, [{
- // -1 etc can be represented as 4294967295 etc
- // Currently, it's not doing this. But some optimization
- // might convert -1 to a large +ve number.
- // assigning to a word restores our desired signed value.
- int32_t imm = N->getSExtValue();
- return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
-}]>;
-
-def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
-def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
-def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
-
-// Emit store-immediate, but only when the stored value will not be constant-
-// extended. The reason for that is that there is no pass that can optimize
-// constant extenders in store-immediate instructions. In some cases we can
-// end up will a number of such stores, all of which store the same extended
-// value (e.g. after unrolling a loop that initializes floating point array).
-
-// Predicates to determine if the 16-bit immediate is expressible as a sign-
-// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
-// beyond 0..15, so we don't care what is in there.
-
-def i16in8ImmPred: PatLeaf<(i32 imm), [{
- int64_t v = (int16_t)N->getSExtValue();
- return v == (int64_t)(int8_t)v;
-}]>;
-
-// Predicates to determine if the 32-bit immediate is expressible as a sign-
-// extended 8-bit immediate.
-def i32in8ImmPred: PatLeaf<(i32 imm), [{
- int64_t v = (int32_t)N->getSExtValue();
- return v == (int64_t)(int8_t)v;
-}]>;
-
-
-let AddedComplexity = 40 in {
- // Even though the offset is not extendable in the store-immediate, we
- // can still generate the fi# in the base address. If the final offset
- // is not valid for the instruction, we will replace it with a scratch
- // register.
-// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
-// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
-// S4_storeirh_io>;
-// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
-
-// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
-// S4_storeirb_io>;
-// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
-// ToImmHalf, S4_storeirh_io>;
-// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
-// S4_storeiri_io>;
-
- defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
- S4_storeirb_io>;
- defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
- S4_storeirh_io>;
- defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
- S4_storeiri_io>;
-}
-
-def: Storexm_simple_pat<truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
-def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>;
-def: Storexm_simple_pat<store, s32_0ImmPred, ToImmWord, S4_storeiri_io>;
-
-// memb(Rx++#s4:0:circ(Mu))=Rt
-// memb(Rx++I:circ(Mu))=Rt
-// memb(Rx++Mu)=Rt
-// memb(Rx++Mu:brev)=Rt
-// memb(gp+#u16:0)=Rt
-
-// Store halfword.
-// TODO: needs to be implemented
-// memh(Re=#U6)=Rt.H
-// memh(Rs+#s11:1)=Rt.H
-// memh(Rs+Ru<<#u2)=Rt.H
-// TODO: needs to be implemented.
-
-// memh(Ru<<#u2+#U6)=Rt.H
-// memh(Rx++#s4:1:circ(Mu))=Rt.H
-// memh(Rx++#s4:1:circ(Mu))=Rt
-// memh(Rx++I:circ(Mu))=Rt.H
-// memh(Rx++I:circ(Mu))=Rt
-// memh(Rx++Mu)=Rt.H
-// memh(Rx++Mu)=Rt
-// memh(Rx++Mu:brev)=Rt.H
-// memh(Rx++Mu:brev)=Rt
-// memh(gp+#u16:1)=Rt
-// if ([!]Pv[.new]) memh(#u6)=Rt.H
-// if ([!]Pv[.new]) memh(#u6)=Rt
-
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
-// TODO: needs to be implemented.
-
-// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
-// TODO: Needs to be implemented.
-
-// Store word.
-// memw(Re=#U6)=Rt
-// TODO: Needs to be implemented.
-// memw(Rx++#s4:2)=Rt
-// memw(Rx++#s4:2:circ(Mu))=Rt
-// memw(Rx++I:circ(Mu))=Rt
-// memw(Rx++Mu)=Rt
-// memw(Rx++Mu:brev)=Rt
-
//===----------------------------------------------------------------------===
// ST -
//===----------------------------------------------------------------------===
@@ -1875,48 +1501,6 @@ def C4_and_orn : T_LOGICAL_3OP<"and", "
def C4_or_andn : T_LOGICAL_3OP<"or", "and", 0b10, 1>;
def C4_or_orn : T_LOGICAL_3OP<"or", "or", 0b11, 1>;
-// op(Ps, op(Pt, Pu))
-class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
- : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
- (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
-
-// op(Ps, op(Pt, ~Pu))
-class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
- : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
- (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
-
-def: LogLog_pat<and, and, C4_and_and>;
-def: LogLog_pat<and, or, C4_and_or>;
-def: LogLog_pat<or, and, C4_or_and>;
-def: LogLog_pat<or, or, C4_or_or>;
-
-def: LogLogNot_pat<and, and, C4_and_andn>;
-def: LogLogNot_pat<and, or, C4_and_orn>;
-def: LogLogNot_pat<or, and, C4_or_andn>;
-def: LogLogNot_pat<or, or, C4_or_orn>;
-
-//===----------------------------------------------------------------------===//
-// PIC: Support for PIC compilations. The patterns and SD nodes defined
-// below are needed to support code generation for PIC
-//===----------------------------------------------------------------------===//
-
-def SDT_HexagonAtGot
- : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
-def SDT_HexagonAtPcrel
- : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-// AT_GOT address-of-GOT, address-of-global, offset-in-global
-def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
-// AT_PCREL address-of-global
-def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
-
-def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
- (L2_loadri_io I32:$got, imm:$addr)>;
-def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
- (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
-def: Pat<(HexagonAtPcrel I32:$addr),
- (C4_addipc imm:$addr)>;
-
//===----------------------------------------------------------------------===//
// CR -
//===----------------------------------------------------------------------===//
@@ -1929,11 +1513,6 @@ def: Pat<(HexagonAtPcrel I32:$addr),
def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
def A4_ornp : T_ALU64_logical<"or", 0b011, 1, 0, 1>;
-def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
- (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
-def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
- (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
-
let hasNewValue = 1, hasSideEffects = 0 in
def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
"$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
@@ -1954,9 +1533,7 @@ let isExtentSigned = 1, hasNewValue = 1,
opExtendable = 3 in
def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
(ins IntRegs:$Rs, IntRegs:$Ru, s6_0Ext:$s6),
- "$Rd = add($Rs, add($Ru, #$s6))" ,
- [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
- (add (i32 IntRegs:$Ru), s32_0ImmPred:$s6)))],
+ "$Rd = add($Rs, add($Ru, #$s6))" , [],
"", ALU64_tc_2_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -1996,34 +1573,6 @@ def S4_subaddi: ALU64Inst <(outs IntRegs
let Inst{4-0} = Ru;
}
-// Rd=add(Rs,sub(#s6,Ru))
-def: Pat<(add (i32 IntRegs:$src1), (sub s32_0ImmPred:$src2,
- (i32 IntRegs:$src3))),
- (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
-
-// Rd=sub(add(Rs,#s6),Ru)
-def: Pat<(sub (add (i32 IntRegs:$src1), s32_0ImmPred:$src2),
- (i32 IntRegs:$src3)),
- (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
-
-// Rd=add(sub(Rs,Ru),#s6)
-def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
- (s32_0ImmPred:$src2)),
- (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
-
-
-// Add or subtract doublewords with carry.
-//TODO:
-// Rdd=add(Rss,Rtt,Px):carry
-//TODO:
-// Rdd=sub(Rss,Rtt,Px):carry
-
-// Extract bitfield
-// Rdd=extract(Rss,#u6,#U6)
-// Rdd=extract(Rss,Rtt)
-// Rd=extract(Rs,Rtt)
-// Rd=extract(Rs,#u5,#U5)
-
def S4_extractp_rp : T_S3op_64 < "extract", 0b11, 0b100, 0>;
def S4_extractp : T_S2op_extract <"extract", 0b1010, DoubleRegs, u6_0Imm>;
@@ -2056,10 +1605,7 @@ let hasSideEffects = 0 in
def M4_xor_xacc
: SInst <(outs DoubleRegs:$Rxx),
(ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
- "$Rxx ^= xor($Rss, $Rtt)",
- [(set (i64 DoubleRegs:$Rxx),
- (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss),
- (i64 DoubleRegs:$Rtt))))],
+ "$Rxx ^= xor($Rss, $Rtt)", [],
"$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
bits<5> Rxx;
bits<5> Rss;
@@ -2160,9 +1706,7 @@ let isExtentSigned = 1, hasNewValue = 1,
def S4_or_andix:
ALU64Inst<(outs IntRegs:$Rx),
(ins IntRegs:$Ru, IntRegs:$_src_, s10_0Ext:$s10),
- "$Rx = or($Ru, and($_src_, #$s10))" ,
- [(set (i32 IntRegs:$Rx),
- (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)))] ,
+ "$Rx = or($Ru, and($_src_, #$s10))" , [] ,
"$_src_ = $Rx", ALU64_tc_2_SLOT23> {
bits<5> Rx;
bits<5> Ru;
@@ -2281,33 +1825,13 @@ def M4_xor_andn : T_MType_acc_rr < "^= a
def M4_or_andn : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
-def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
-def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
-def: T_MType_acc_pat2 <M4_or_and, and, or>;
-def: T_MType_acc_pat2 <M4_and_and, and, and>;
-def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
-def: T_MType_acc_pat2 <M4_or_or, or, or>;
-def: T_MType_acc_pat2 <M4_and_or, or, and>;
-def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
-
-class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
- : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
- (not IntRegs:$src3)))),
- (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
-
-def: T_MType_acc_pat3 <M4_or_andn, and, or>;
-def: T_MType_acc_pat3 <M4_and_andn, and, and>;
-def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
-
// Compound or-or and or-and
let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
opExtentBits = 10, opExtendable = 3 in
class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
: MInst_acc <(outs IntRegs:$Rx),
(ins IntRegs:$src1, IntRegs:$Rs, s10_0Ext:$s10),
- "$Rx |= "#mnemonic#"($Rs, #$s10)",
- [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
- (OpNode (i32 IntRegs:$Rs), s32_0ImmPred:$s10)))],
+ "$Rx |= "#mnemonic#"($Rs, #$s10)", [],
"$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
bits<5> Rx;
bits<5> Rs;
@@ -2378,19 +1902,6 @@ def S2_ct0p : T_COUNT_LEADING_64<"ct0",
def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
-// Count trailing zeros: 64-bit.
-def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
-
-// Count trailing ones: 64-bit.
-def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
-
-// Define leading/trailing patterns that require zero-extensions to 64 bits.
-def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
-def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
-def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
-def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
-
-
let hasSideEffects = 0, hasNewValue = 1 in
def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6_0Imm:$s6),
"$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
@@ -2426,41 +1937,10 @@ def S4_clbpaddi : SInst<(outs IntRegs:$R
def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
-let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
- def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
- (S4_ntstbit_i (i32 IntRegs:$Rs), u5_0ImmPred:$u5)>;
- def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
- (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
-}
-
-// Add extra complexity to prefer these instructions over bitsset/bitsclr.
-// The reason is that tstbit/ntstbit can be folded into a compound instruction:
-// if ([!]tstbit(...)) jump ...
-let AddedComplexity = 100 in
-def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
- (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
-
-let AddedComplexity = 100 in
-def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
- (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
-
def C4_nbitsset : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
def C4_nbitsclr : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
-// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
-// represented as a compare against "value & 0xFF", which is an exact match
-// for cmpb (same for cmph). The patterns below do not contain any additional
-// complexity that would make them preferable, and if they were actually used
-// instead of cmpb/cmph, they would result in a compare against register that
-// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
-def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
- (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>;
-def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
- (C4_nbitsclr I32:$Rs, I32:$Rt)>;
-def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
- (C4_nbitsset I32:$Rs, I32:$Rt)>;
-
//===----------------------------------------------------------------------===//
// XTYPE/BIT -
//===----------------------------------------------------------------------===//
@@ -2474,10 +1954,7 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$R
let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
(ins u6_0Ext:$u6, IntRegs:$Rs, u6_0Imm:$U6),
- "$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
- [(set (i32 IntRegs:$Rd),
- (add (mul (i32 IntRegs:$Rs), u6_0ImmPred:$U6),
- u32_0ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+ "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [],"",ALU64_tc_3x_SLOT23> {
bits<5> Rd;
bits<6> u6;
bits<5> Rs;
@@ -2500,10 +1977,7 @@ let CextOpcode = "ADD_MPY", InputType =
isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
(ins u6_0Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
- "$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
- [(set (i32 IntRegs:$Rd),
- (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32_0ImmPred:$u6))],
- "", ALU64_tc_3x_SLOT23>, ImmRegRel {
+ "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [], "", ALU64_tc_3x_SLOT23>, ImmRegRel {
bits<5> Rd;
bits<6> u6;
bits<5> Rs;
@@ -2524,9 +1998,7 @@ let hasNewValue = 1 in
class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
: ALU64Inst <(outs IntRegs:$dst), ins,
"$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
- "#$src2, $src3))"),
- [(set (i32 IntRegs:$dst),
- (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))],
+ "#$src2, $src3))"), [],
"", ALU64_tc_3x_SLOT23> {
bits<5> dst;
bits<5> src1;
@@ -2559,9 +2031,7 @@ def M4_mpyri_addr : T_AddMpy<0b1, u32_0I
let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
(ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
- "$Rx = add($Ru, mpyi($_src_, $Rs))",
- [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru),
- (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))],
+ "$Rx = add($Ru, mpyi($_src_, $Rs))", [],
"$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
bits<5> Rx;
bits<5> Ru;
@@ -2652,7 +2122,6 @@ class T_vcmpImm <string Str, bits<2> cmp
// Vector compare bytes
def A4_vcmpbgt : T_vcmp <"vcmpb.gt", 0b1010>;
-def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
@@ -2714,40 +2183,6 @@ defm S4_andi : T_ShiftOperate<"and", 0b0
defm S4_ori : T_ShiftOperate<"or", 0b01, ALU64_tc_1_SLOT23>;
defm S4_subi : T_ShiftOperate<"sub", 0b11, ALU64_tc_1_SLOT23>;
-class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
- : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8),
- (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
-
-let AddedComplexity = 200 in {
- def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>;
- def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>;
- def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>;
- def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>;
-}
-
-let AddedComplexity = 30 in {
- def : T_Shift_CommOp_pat <S4_ori_asl_ri, or, shl>;
- def : T_Shift_CommOp_pat <S4_ori_lsr_ri, or, srl>;
-}
-
-class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
- : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)),
- (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
-
-def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>;
-def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>;
-
-let AddedComplexity = 200 in {
- def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
- (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
- def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
- (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
- def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
- (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
- def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
- (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
-}
-
// Vector conditional negate
// Rdd=vcnegh(Rss,Rt)
let Defs = [USR_OVF], Itinerary = S_3op_tc_2_SLOT23 in
@@ -2837,10 +2272,7 @@ def A4_vrminuw : T_S3op_6 < "vrminuw", 0
// Shift an immediate left by register amount.
let hasNewValue = 1, hasSideEffects = 0 in
def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt),
- "$Rd = lsl(#$s6, $Rt)" ,
- [(set (i32 IntRegs:$Rd), (shl s6_0ImmPred:$s6,
- (i32 IntRegs:$Rt)))],
- "", S_3op_tc_1_SLOT23> {
+ "$Rd = lsl(#$s6, $Rt)" , [], "", S_3op_tc_1_SLOT23> {
bits<5> Rd;
bits<6> s6;
bits<5> Rt;
@@ -2863,71 +2295,6 @@ def S4_lsli: SInst <(outs IntRegs:$Rd),
// MEMOP
//===----------------------------------------------------------------------===//
-def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
- int8_t v = (int8_t)N->getSExtValue();
- return v > -32 && v <= -1;
-}]>;
-
-def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
- int16_t v = (int16_t)N->getSExtValue();
- return v > -32 && v <= -1;
-}]>;
-
-def Clr5Imm8Pred : PatLeaf<(i32 imm), [{
- uint32_t v = (uint8_t)~N->getZExtValue();
- return ImmIsSingleBit(v);
-}]>;
-
-def Clr5Imm16Pred : PatLeaf<(i32 imm), [{
- uint32_t v = (uint16_t)~N->getZExtValue();
- return ImmIsSingleBit(v);
-}]>;
-
-def Set5Imm8 : SDNodeXForm<imm, [{
- uint32_t imm = (uint8_t)N->getZExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Set5Imm16 : SDNodeXForm<imm, [{
- uint32_t imm = (uint16_t)N->getZExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Set5Imm32 : SDNodeXForm<imm, [{
- uint32_t imm = (uint32_t)N->getZExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Clr5Imm8 : SDNodeXForm<imm, [{
- uint32_t imm = (uint8_t)~N->getZExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Clr5Imm16 : SDNodeXForm<imm, [{
- uint32_t imm = (uint16_t)~N->getZExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Clr5Imm32 : SDNodeXForm<imm, [{
- int32_t imm = (int32_t)~N->getZExtValue();
- return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def NegImm8 : SDNodeXForm<imm, [{
- int8_t V = N->getSExtValue();
- return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
-}]>;
-
-def NegImm16 : SDNodeXForm<imm, [{
- int16_t V = N->getSExtValue();
- return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
-}]>;
-
-def NegImm32 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
-}]>;
-
-def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
//===----------------------------------------------------------------------===//
// Template class for MemOp instructions with the register value.
@@ -3026,235 +2393,6 @@ let isExtendable = 1, opExtendable = 1,
}
-multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
- InstHexagon MI> {
- // Addr: i32
- def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
- (MI I32:$Rs, 0, I32:$A)>;
- // Addr: fi
- def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
- (MI AddrFI:$Rs, 0, I32:$A)>;
-}
-
-multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
- SDNode Oper, InstHexagon MI> {
- // Addr: i32
- def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
- (add I32:$Rs, ImmPred:$Off)),
- (MI I32:$Rs, imm:$Off, I32:$A)>;
- def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A),
- (orisadd I32:$Rs, ImmPred:$Off)),
- (MI I32:$Rs, imm:$Off, I32:$A)>;
- // Addr: fi
- def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
- (add AddrFI:$Rs, ImmPred:$Off)),
- (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
- def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
- (orisadd AddrFI:$Rs, ImmPred:$Off)),
- (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
-}
-
-multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
- SDNode Oper, InstHexagon MI> {
- defm: Memopxr_simple_pat <Load, Store, Oper, MI>;
- defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
-}
-
-let AddedComplexity = 180 in {
- // add reg
- defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
- /*anyext*/ L4_add_memopb_io>;
- defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
- /*sext*/ L4_add_memopb_io>;
- defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
- /*zext*/ L4_add_memopb_io>;
- defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
- /*anyext*/ L4_add_memoph_io>;
- defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
- /*sext*/ L4_add_memoph_io>;
- defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
- /*zext*/ L4_add_memoph_io>;
- defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
-
- // sub reg
- defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
- /*anyext*/ L4_sub_memopb_io>;
- defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
- /*sext*/ L4_sub_memopb_io>;
- defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
- /*zext*/ L4_sub_memopb_io>;
- defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
- /*anyext*/ L4_sub_memoph_io>;
- defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
- /*sext*/ L4_sub_memoph_io>;
- defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
- /*zext*/ L4_sub_memoph_io>;
- defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
-
- // and reg
- defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
- /*anyext*/ L4_and_memopb_io>;
- defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
- /*sext*/ L4_and_memopb_io>;
- defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
- /*zext*/ L4_and_memopb_io>;
- defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
- /*anyext*/ L4_and_memoph_io>;
- defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
- /*sext*/ L4_and_memoph_io>;
- defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
- /*zext*/ L4_and_memoph_io>;
- defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
-
- // or reg
- defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
- /*anyext*/ L4_or_memopb_io>;
- defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
- /*sext*/ L4_or_memopb_io>;
- defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
- /*zext*/ L4_or_memopb_io>;
- defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
- /*anyext*/ L4_or_memoph_io>;
- defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
- /*sext*/ L4_or_memoph_io>;
- defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
- /*zext*/ L4_or_memoph_io>;
- defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
-}
-
-
-multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
- PatFrag Arg, SDNodeXForm ArgMod,
- InstHexagon MI> {
- // Addr: i32
- def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
- (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
- // Addr: fi
- def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
- (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
-}
-
-multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
- SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
- InstHexagon MI> {
- // Addr: i32
- def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
- (add I32:$Rs, ImmPred:$Off)),
- (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
- def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A),
- (orisadd I32:$Rs, ImmPred:$Off)),
- (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
- // Addr: fi
- def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
- (add AddrFI:$Rs, ImmPred:$Off)),
- (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
- def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
- (orisadd AddrFI:$Rs, ImmPred:$Off)),
- (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
-}
-
-multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
- SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
- InstHexagon MI> {
- defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>;
- defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
-}
-
-
-let AddedComplexity = 200 in {
- // add imm
- defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
- /*anyext*/ IdImm, L4_iadd_memopb_io>;
- defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
- /*sext*/ IdImm, L4_iadd_memopb_io>;
- defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
- /*zext*/ IdImm, L4_iadd_memopb_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
- /*anyext*/ IdImm, L4_iadd_memoph_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
- /*sext*/ IdImm, L4_iadd_memoph_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
- /*zext*/ IdImm, L4_iadd_memoph_io>;
- defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
- L4_iadd_memopw_io>;
- defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
- /*anyext*/ NegImm8, L4_iadd_memopb_io>;
- defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
- /*sext*/ NegImm8, L4_iadd_memopb_io>;
- defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
- /*zext*/ NegImm8, L4_iadd_memopb_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
- /*anyext*/ NegImm16, L4_iadd_memoph_io>;
- defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
- /*sext*/ NegImm16, L4_iadd_memoph_io>;
- defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
- /*zext*/ NegImm16, L4_iadd_memoph_io>;
- defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
- L4_iadd_memopw_io>;
-
- // sub imm
- defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
- /*anyext*/ IdImm, L4_isub_memopb_io>;
- defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
- /*sext*/ IdImm, L4_isub_memopb_io>;
- defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
- /*zext*/ IdImm, L4_isub_memopb_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
- /*anyext*/ IdImm, L4_isub_memoph_io>;
- defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
- /*sext*/ IdImm, L4_isub_memoph_io>;
- defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
- /*zext*/ IdImm, L4_isub_memoph_io>;
- defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
- L4_isub_memopw_io>;
- defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
- /*anyext*/ NegImm8, L4_isub_memopb_io>;
- defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
- /*sext*/ NegImm8, L4_isub_memopb_io>;
- defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
- /*zext*/ NegImm8, L4_isub_memopb_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
- /*anyext*/ NegImm16, L4_isub_memoph_io>;
- defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
- /*sext*/ NegImm16, L4_isub_memoph_io>;
- defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
- /*zext*/ NegImm16, L4_isub_memoph_io>;
- defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
- L4_isub_memopw_io>;
-
- // clrbit imm
- defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
- /*anyext*/ Clr5Imm8, L4_iand_memopb_io>;
- defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
- /*sext*/ Clr5Imm8, L4_iand_memopb_io>;
- defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
- /*zext*/ Clr5Imm8, L4_iand_memopb_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
- /*anyext*/ Clr5Imm16, L4_iand_memoph_io>;
- defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
- /*sext*/ Clr5Imm16, L4_iand_memoph_io>;
- defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
- /*zext*/ Clr5Imm16, L4_iand_memoph_io>;
- defm: Memopxi_pat<load, store, u6_2ImmPred, and, Clr5ImmPred, Clr5Imm32,
- L4_iand_memopw_io>;
-
- // setbit imm
- defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
- /*anyext*/ Set5Imm8, L4_ior_memopb_io>;
- defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
- /*sext*/ Set5Imm8, L4_ior_memopb_io>;
- defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
- /*zext*/ Set5Imm8, L4_ior_memopb_io>;
- defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
- /*anyext*/ Set5Imm16, L4_ior_memoph_io>;
- defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
- /*sext*/ Set5Imm16, L4_ior_memoph_io>;
- defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
- /*zext*/ Set5Imm16, L4_ior_memoph_io>;
- defm: Memopxi_pat<load, store, u6_2ImmPred, or, Set5ImmPred, Set5Imm32,
- L4_ior_memopw_io>;
-}
-
//===----------------------------------------------------------------------===//
// XTYPE/PRED +
//===----------------------------------------------------------------------===//
@@ -3275,54 +2413,6 @@ def C4_cmpneqi : T_CMP <"cmp.eq", 0b00
def C4_cmpltei : T_CMP <"cmp.gt", 0b01, 1, s10_0Ext>;
def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9_0Ext>;
-def : T_CMP_pat <C4_cmpneqi, setne, s32_0ImmPred>;
-def : T_CMP_pat <C4_cmpltei, setle, s32_0ImmPred>;
-def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>;
-
-// rs <= rt -> !(rs > rt).
-/*
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
-// (C4_cmpltei IntRegs:$src1, s32_0ImmPred:$src2)>;
-*/
-// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
-
-// rs != rt -> !(rs == rt).
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
- (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_BYTE : SDNodeXForm<imm, [{
- // Return the byte immediate const-1 as an SDNode.
- int32_t imm = N->getSExtValue();
- return XformU7ToU7M1Imm(imm, SDLoc(N));
-}]>;
-
-// For the sequence
-// zext( setult ( and(Rs, 255), u8))
-// Use the isdigit transformation below
-
-// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
-// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
-// The isdigit transformation relies on two 'clever' aspects:
-// 1) The data type is unsigned which allows us to eliminate a zero test after
-// biasing the expression by 48. We are depending on the representation of
-// the unsigned types, and semantics.
-// 2) The front end has converted <= 9 into < 10 on entry to LLVM
-//
-// For the C code:
-// retval = ((c>='0') & (c<='9')) ? 1 : 0;
-// The code is transformed upstream of llvm into
-// retval = (c-48) < 10 ? 1 : 0;
-let AddedComplexity = 139 in
-def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
- u7_0StrictPosImmPred:$src2)))),
- (C2_muxii (A4_cmpbgtui IntRegs:$src1,
- (DEC_CONST_BYTE u7_0StrictPosImmPred:$src2)),
- 0, 1)>;
-
//===----------------------------------------------------------------------===//
// XTYPE/PRED -
//===----------------------------------------------------------------------===//
@@ -3717,50 +2807,6 @@ let isNVStorable = 0, accessSize = HalfW
def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
u16_1Imm, 0b01, 1>, PredNewRel;
-class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
- : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
-
-class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
- InstHexagon MI>
- : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
-
-class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
- : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
-
-class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
- InstHexagon MI>
- : Pat<(Store Value:$val, Addr:$addr),
- (MI Addr:$addr, (ValueMod Value:$val))>;
-
-let AddedComplexity = 30 in {
- def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
- def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
- def: Storea_pat<store, I32, addrga, PS_storeriabs>;
- def: Storea_pat<store, I64, addrga, PS_storerdabs>;
-
- def: Stoream_pat<truncstorei8, I64, addrga, LoReg, PS_storerbabs>;
- def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>;
- def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
-}
-
-def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
-def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
-def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
-def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
-
-let AddedComplexity = 100 in {
- def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
- def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
- def: Storea_pat<store, I32, addrgp, S2_storerigp>;
- def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
-
- // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
- // to "r0 = 1; memw(#foo) = r0"
- let AddedComplexity = 100 in
- def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
- (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
-}
-
//===----------------------------------------------------------------------===//
// Template class for non predicated load instructions with
// absolute addressing mode.
@@ -3880,26 +2926,6 @@ defm loadri : LD_Abs<"memw", "LDriw",
let accessSize = DoubleWordAccess in
defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
- : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
- (VT (MI tglobaladdr:$absaddr))>;
-
-let AddedComplexity = 30 in {
- def: LoadAbs_pats <load, PS_loadriabs>;
- def: LoadAbs_pats <zextloadi1, PS_loadrubabs>;
- def: LoadAbs_pats <sextloadi8, PS_loadrbabs>;
- def: LoadAbs_pats <extloadi8, PS_loadrubabs>;
- def: LoadAbs_pats <zextloadi8, PS_loadrubabs>;
- def: LoadAbs_pats <sextloadi16, PS_loadrhabs>;
- def: LoadAbs_pats <extloadi16, PS_loadruhabs>;
- def: LoadAbs_pats <zextloadi16, PS_loadruhabs>;
- def: LoadAbs_pats <load, PS_loadrdabs, i64>;
-}
-
-let AddedComplexity = 30 in
-def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
- (Zext64 (PS_loadrubabs tglobaladdr:$absaddr))>;
-
//===----------------------------------------------------------------------===//
// multiclass for load instructions with GP-relative addressing mode.
// Rx=mem[bhwd](##global)
@@ -3930,148 +2956,10 @@ def L2_loadrigp : T_LoadGP<"memw", "LD
let accessSize = DoubleWordAccess in
def L2_loadrdgp : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
-def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
-def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
-def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
-def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>;
-def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
-
-def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>;
-def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
-
-// Map from load(globaladdress) -> mem[u][bhwd](#foo)
-class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
- : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
- (VT (MI tglobaladdr:$global))>;
-
-let AddedComplexity = 100 in {
- def: LoadGP_pats <extloadi8, L2_loadrubgp>;
- def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
- def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
- def: LoadGP_pats <extloadi16, L2_loadruhgp>;
- def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
- def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
- def: LoadGP_pats <load, L2_loadrigp>;
- def: LoadGP_pats <load, L2_loadrdgp, i64>;
-}
-
-// When the Interprocedural Global Variable optimizer realizes that a certain
-// global variable takes only two constant values, it shrinks the global to
-// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in {
- def: LoadGP_pats <extloadi1, L2_loadrubgp>;
- def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
-}
-
-// Transfer global address into a register
-def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi s16_0Ext:$Rs)>;
-def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16_0Ext:$Rs)>;
-def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi s16_0Ext:$Rs)>;
-
-let AddedComplexity = 30 in {
- def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>;
- def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
- def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>;
-}
-
-let AddedComplexity = 30 in {
- def: Loada_pat<load, i32, u32_0ImmPred, PS_loadriabs>;
- def: Loada_pat<sextloadi8, i32, u32_0ImmPred, PS_loadrbabs>;
- def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>;
- def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
- def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
-}
-
-// Indexed store word - global address.
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 100 in
-defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
-
-// Load from a global address that has only one use in the current basic block.
-let AddedComplexity = 100 in {
- def: Loada_pat<extloadi8, i32, addrga, PS_loadrubabs>;
- def: Loada_pat<sextloadi8, i32, addrga, PS_loadrbabs>;
- def: Loada_pat<zextloadi8, i32, addrga, PS_loadrubabs>;
-
- def: Loada_pat<extloadi16, i32, addrga, PS_loadruhabs>;
- def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>;
- def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>;
-
- def: Loada_pat<load, i32, addrga, PS_loadriabs>;
- def: Loada_pat<load, i64, addrga, PS_loadrdabs>;
-}
-
-// Store to a global address that has only one use in the current basic block.
-let AddedComplexity = 100 in {
- def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
- def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
- def: Storea_pat<store, I32, addrga, PS_storeriabs>;
- def: Storea_pat<store, I64, addrga, PS_storerdabs>;
-
- def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
-}
-
-// i8/i16/i32 -> i64 loads
-// We need a complexity of 120 here to override preceding handling of
-// zextload.
-let AddedComplexity = 120 in {
- def: Loadam_pat<extloadi8, i64, addrga, Zext64, PS_loadrubabs>;
- def: Loadam_pat<sextloadi8, i64, addrga, Sext64, PS_loadrbabs>;
- def: Loadam_pat<zextloadi8, i64, addrga, Zext64, PS_loadrubabs>;
-
- def: Loadam_pat<extloadi16, i64, addrga, Zext64, PS_loadruhabs>;
- def: Loadam_pat<sextloadi16, i64, addrga, Sext64, PS_loadrhabs>;
- def: Loadam_pat<zextloadi16, i64, addrga, Zext64, PS_loadruhabs>;
-
- def: Loadam_pat<extloadi32, i64, addrga, Zext64, PS_loadriabs>;
- def: Loadam_pat<sextloadi32, i64, addrga, Sext64, PS_loadriabs>;
- def: Loadam_pat<zextloadi32, i64, addrga, Zext64, PS_loadriabs>;
-}
-
-let AddedComplexity = 100 in {
- def: Loada_pat<extloadi8, i32, addrgp, PS_loadrubabs>;
- def: Loada_pat<sextloadi8, i32, addrgp, PS_loadrbabs>;
- def: Loada_pat<zextloadi8, i32, addrgp, PS_loadrubabs>;
-
- def: Loada_pat<extloadi16, i32, addrgp, PS_loadruhabs>;
- def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>;
- def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>;
-
- def: Loada_pat<load, i32, addrgp, PS_loadriabs>;
- def: Loada_pat<load, i64, addrgp, PS_loadrdabs>;
-}
-
-let AddedComplexity = 100 in {
- def: Storea_pat<truncstorei8, I32, addrgp, PS_storerbabs>;
- def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>;
- def: Storea_pat<store, I32, addrgp, PS_storeriabs>;
- def: Storea_pat<store, I64, addrgp, PS_storerdabs>;
-}
-
-def: Loada_pat<atomic_load_8, i32, addrgp, PS_loadrubabs>;
-def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>;
-def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>;
-def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>;
-
-def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, PS_storerbabs>;
-def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
-def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
-def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
-
let Constraints = "@earlyclobber $dst" in
def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
IntRegs:$c, IntRegs:$d),
- ".error \"Should never try to emit Insert4\"",
- [(set (i64 DoubleRegs:$dst),
- (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
- (i32 16)),
- (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
- (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
- (i32 32))),
- (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
+ ".error \"Should never try to emit Insert4\"", []>;
//===----------------------------------------------------------------------===//
// :raw for of boundscheck:hi:lo insns
@@ -4141,20 +3029,12 @@ def A4_tlbmatch : ALU64Inst<(outs PredRe
let Inst{1-0} = Pd;
}
-// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
-// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
-// We don't really want either one here.
-def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
-def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
- [SDNPHasChain]>;
-
// Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
// really do a load.
let hasSideEffects = 1, mayLoad = 0 in
def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
"dcfetch($Rs + #$u11_3)",
- [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
- "", LD_tc_ld_SLOT0> {
+ [], "", LD_tc_ld_SLOT0> {
bits<5> Rs;
bits<14> u11_3;
@@ -4166,9 +3046,6 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins
}
-def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
- (Y2_dcfetchbo IntRegs:$Rs, u11_3ImmPred:$u11_3)>;
-
//===----------------------------------------------------------------------===//
// Compound instructions
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV5.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV5.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV5.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV5.td Sat Nov 5 10:01:38 2016
@@ -11,15 +11,6 @@
//
//===----------------------------------------------------------------------===//
-def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
-def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
-
-def ftoi : SDNodeXForm<fpimm, [{
- APInt I = N->getValueAPF().bitcastToAPInt();
- return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
- MVT::getIntegerVT(I.getBitWidth()));
-}]>;
-
//===----------------------------------------------------------------------===//
// XTYPE/MPY
//===----------------------------------------------------------------------===//
@@ -52,10 +43,7 @@ let Predicates = [HasV5T] in {
def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
}
-def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm,
- [(set I64:$dst,
- (sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)),
- (i32 1)))], 1>,
+def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, [], 1>,
Requires<[HasV5T]> {
bits<6> src2;
let Inst{13-8} = src2;
@@ -76,15 +64,9 @@ def C4_fastcorner9_not : T_LOGICAL_2OP<"
let Inst{20,13,7,4} = 0b1111;
}
-def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
- SDTCisVT<1, i64>]>;
-
-def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
-
let hasNewValue = 1, validSubTargets = HasV5SubT in
def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
- "$Rd = popcount($Rss)",
- [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
+ "$Rd = popcount($Rss)", [], "", S_2op_tc_2_SLOT23>,
Requires<[HasV5T]> {
bits<5> Rd;
bits<5> Rss;
@@ -97,76 +79,6 @@ def S5_popcountp : ALU64_rr<(outs IntReg
let Inst{20-16} = Rss;
}
-let AddedComplexity = 20 in {
- defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
- defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
-}
-
-let AddedComplexity = 60 in {
- defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
- defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
-}
-
-let AddedComplexity = 40 in {
- def: Loadxs_pat<load, f32, L4_loadri_rr>;
- def: Loadxs_pat<load, f64, L4_loadrd_rr>;
-}
-
-let AddedComplexity = 20 in {
- def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
- def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
-}
-
-let AddedComplexity = 80 in {
- def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>;
- def: Loada_pat<load, f32, addrga, PS_loadriabs>;
- def: Loada_pat<load, f64, addrga, PS_loadrdabs>;
-}
-
-let AddedComplexity = 100 in {
- def: LoadGP_pats <load, L2_loadrigp, f32>;
- def: LoadGP_pats <load, L2_loadrdgp, f64>;
-}
-
-let AddedComplexity = 20 in {
- defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
- defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
-}
-
-// Simple patterns should be tried with the least priority.
-def: Storex_simple_pat<store, F32, S2_storeri_io>;
-def: Storex_simple_pat<store, F64, S2_storerd_io>;
-
-let AddedComplexity = 60 in {
- defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
- defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
-}
-
-let AddedComplexity = 40 in {
- def: Storexs_pat<store, F32, S4_storeri_rr>;
- def: Storexs_pat<store, F64, S4_storerd_rr>;
-}
-
-let AddedComplexity = 20 in {
- def: Store_rr_pat<store, F32, S4_storeri_rr>;
- def: Store_rr_pat<store, F64, S4_storerd_rr>;
-}
-
-let AddedComplexity = 80 in {
- def: Storea_pat<store, F32, addrga, PS_storeriabs>;
- def: Storea_pat<store, F64, addrga, PS_storerdabs>;
-}
-
-let AddedComplexity = 100 in {
- def: Storea_pat<store, F32, addrgp, S2_storerigp>;
- def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
-}
-
-defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
-defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
-def: Storex_simple_pat<store, F32, S2_storeri_io>;
-def: Storex_simple_pat<store, F64, S2_storerd_io>;
-
let isFP = 1, hasNewValue = 1, opNewValue = 0 in
class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
: MInst<(outs IntRegs:$Rd),
@@ -196,43 +108,11 @@ let isCommutable = 1 in {
def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
-def: Pat<(fadd F32:$src1, F32:$src2),
- (F2_sfadd F32:$src1, F32:$src2)>;
-
-def: Pat<(fsub F32:$src1, F32:$src2),
- (F2_sfsub F32:$src1, F32:$src2)>;
-
-def: Pat<(fmul F32:$src1, F32:$src2),
- (F2_sfmpy F32:$src1, F32:$src2)>;
-
let Itinerary = M_tc_3x_SLOT23 in {
def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
}
-let Predicates = [HasV5T] in {
- def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
- def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
-}
-
-let AddedComplexity = 100, Predicates = [HasV5T] in {
- class SfSel12<PatFrag Cmp, InstHexagon MI>
- : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
- (MI F32:$Rs, F32:$Rt)>;
- class SfSel21<PatFrag Cmp, InstHexagon MI>
- : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs),
- (MI F32:$Rs, F32:$Rt)>;
-
- def: SfSel12<setolt, F2_sfmin>;
- def: SfSel12<setole, F2_sfmin>;
- def: SfSel12<setogt, F2_sfmax>;
- def: SfSel12<setoge, F2_sfmax>;
- def: SfSel21<setolt, F2_sfmax>;
- def: SfSel21<setole, F2_sfmax>;
- def: SfSel21<setogt, F2_sfmin>;
- def: SfSel21<setoge, F2_sfmin>;
-}
-
let Itinerary = M_tc_3or4x_SLOT23 in {
def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
@@ -283,15 +163,13 @@ class T_fcmp <string mnemonic, RegisterC
}
class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
- : T_fcmp <mnemonic, DoubleRegs, MinOp,
- [(set I1:$dst, (OpNode F64:$src1, F64:$src2))]> {
+ : T_fcmp <mnemonic, DoubleRegs, MinOp, []> {
let IClass = 0b1101;
let Inst{27-21} = 0b0010111;
}
class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
- : T_fcmp <mnemonic, IntRegs, MinOp,
- [(set I1:$dst, (OpNode F32:$src1, F32:$src2))]> {
+ : T_fcmp <mnemonic, IntRegs, MinOp, []> {
let IClass = 0b1100;
let Inst{27-21} = 0b0111111;
}
@@ -306,260 +184,12 @@ def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", se
def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations.
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasV5T] in
-multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
- // IntRegs
- def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
- (IntMI F32:$src1, F32:$src2)>;
- // DoubleRegs
- def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
- (DoubleMI F64:$src1, F64:$src2)>;
-}
-
-defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
-defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
-defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
-
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
-//===----------------------------------------------------------------------===//
-let Predicates = [HasV5T] in
-multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
- // IntRegs
- def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
- (IntMI F32:$src1, F32:$src2))>;
-
- // DoubleRegs
- def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
- (DoubleMI F64:$src1, F64:$src2))>;
-}
-
-defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
-defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
-defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
-
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for the following dags:
-// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
-// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
-// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
-// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
-//===----------------------------------------------------------------------===//
-let Predicates = [HasV5T] in
-multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
- InstHexagon DoubleMI> {
- // IntRegs
- def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
- (C2_not (IntMI F32:$src1, F32:$src2))>;
- def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
- (IntMI F32:$src1, F32:$src2)>;
- def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
- (IntMI F32:$src1, F32:$src2)>;
- def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
- (C2_not (IntMI F32:$src1, F32:$src2))>;
-
- // DoubleRegs
- def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
- (C2_not (DoubleMI F64:$src1, F64:$src2))>;
- def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
- (DoubleMI F64:$src1, F64:$src2)>;
- def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
- (DoubleMI F64:$src1, F64:$src2)>;
- def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
- (C2_not (DoubleMI F64:$src1, F64:$src2))>;
-}
-
-defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
-defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
-defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
-
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for the following dags:
-// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
-// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
-// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
-// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
-//===----------------------------------------------------------------------===//
-let Predicates = [HasV5T] in
-multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
- InstHexagon DoubleMI> {
- // IntRegs
- def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
- (C2_not (IntMI F32:$src2, F32:$src1))>;
- def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
- (IntMI F32:$src2, F32:$src1)>;
- def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
- (IntMI F32:$src2, F32:$src1)>;
- def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
- (C2_not (IntMI F32:$src2, F32:$src1))>;
-
- // DoubleRegs
- def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
- (C2_not (DoubleMI F64:$src2, F64:$src1))>;
- def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
- (DoubleMI F64:$src2, F64:$src1)>;
- def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
- (DoubleMI F64:$src2, F64:$src1)>;
- def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
- (C2_not (DoubleMI F64:$src2, F64:$src1))>;
-}
-
-defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
-defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
-
-
-// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
-let Predicates = [HasV5T] in {
- def: Pat<(i1 (seto F32:$src1, F32:$src2)),
- (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
- def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)),
- (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
- def: Pat<(i1 (seto F64:$src1, F64:$src2)),
- (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
- def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)),
- (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>;
-}
-
-// Ordered lt.
-let Predicates = [HasV5T] in {
- def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
- (F2_sfcmpgt F32:$src2, F32:$src1)>;
- def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)),
- (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
- def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
- (F2_dfcmpgt F64:$src2, F64:$src1)>;
- def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)),
- (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
-}
-
-// Unordered lt.
-let Predicates = [HasV5T] in {
- def: Pat<(i1 (setult F32:$src1, F32:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
- (F2_sfcmpgt F32:$src2, F32:$src1))>;
- def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
- (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
- def: Pat<(i1 (setult F64:$src1, F64:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
- (F2_dfcmpgt F64:$src2, F64:$src1))>;
- def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
- (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>;
-}
-
-// Ordered le.
-let Predicates = [HasV5T] in {
- // rs <= rt -> rt >= rs.
- def: Pat<(i1 (setole F32:$src1, F32:$src2)),
- (F2_sfcmpge F32:$src2, F32:$src1)>;
- def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)),
- (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
-
- // Rss <= Rtt -> Rtt >= Rss.
- def: Pat<(i1 (setole F64:$src1, F64:$src2)),
- (F2_dfcmpge F64:$src2, F64:$src1)>;
- def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)),
- (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
-}
-
-// Unordered le.
-let Predicates = [HasV5T] in {
-// rs <= rt -> rt >= rs.
- def: Pat<(i1 (setule F32:$src1, F32:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
- (F2_sfcmpge F32:$src2, F32:$src1))>;
- def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
- (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
- def: Pat<(i1 (setule F64:$src1, F64:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
- (F2_dfcmpge F64:$src2, F64:$src1))>;
- def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
- (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>;
-}
-
-// Ordered ne.
-let Predicates = [HasV5T] in {
- def: Pat<(i1 (setone F32:$src1, F32:$src2)),
- (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
- def: Pat<(i1 (setone F64:$src1, F64:$src2)),
- (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
- def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)),
- (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
- def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)),
- (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
-}
-
-// Unordered ne.
-let Predicates = [HasV5T] in {
- def: Pat<(i1 (setune F32:$src1, F32:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
- (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
- def: Pat<(i1 (setune F64:$src1, F64:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
- (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
- def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)),
- (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
- (C2_not (F2_sfcmpeq F32:$src1,
- (f32 (A2_tfrsi (ftoi $src2))))))>;
- def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)),
- (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
- (C2_not (F2_dfcmpeq F64:$src1,
- (CONST64 (ftoi $src2)))))>;
-}
-
-// Besides set[o|u][comparions], we also need set[comparisons].
-let Predicates = [HasV5T] in {
- // lt.
- def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
- (F2_sfcmpgt F32:$src2, F32:$src1)>;
- def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)),
- (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
- def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
- (F2_dfcmpgt F64:$src2, F64:$src1)>;
- def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)),
- (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
-
- // le.
- // rs <= rt -> rt >= rs.
- def: Pat<(i1 (setle F32:$src1, F32:$src2)),
- (F2_sfcmpge F32:$src2, F32:$src1)>;
- def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)),
- (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
-
- // Rss <= Rtt -> Rtt >= Rss.
- def: Pat<(i1 (setle F64:$src1, F64:$src2)),
- (F2_dfcmpge F64:$src2, F64:$src1)>;
- def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)),
- (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
-
- // ne.
- def: Pat<(i1 (setne F32:$src1, F32:$src2)),
- (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
- def: Pat<(i1 (setne F64:$src1, F64:$src2)),
- (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
- def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)),
- (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
- def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)),
- (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
-}
-
// F2 convert template classes:
let Uses = [USR], isFP = 1 in
class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
- SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
string chop ="">
: SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
- "$Rdd = "#mnemonic#"($Rss)"#chop,
- [(set RCOut:$Rdd, (Op RCIn:$Rss))], "",
+ "$Rdd = "#mnemonic#"($Rss)"#chop, [], "",
S_2op_tc_3or4x_SLOT23> {
bits<5> Rdd;
bits<5> Rss;
@@ -574,11 +204,9 @@ class F2_RDD_RSS_CONVERT<string mnemonic
let Uses = [USR], isFP = 1 in
class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
- SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
string chop ="">
: SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
- "$Rdd = "#mnemonic#"($Rs)"#chop,
- [(set RCOut:$Rdd, (Op RCIn:$Rs))], "",
+ "$Rdd = "#mnemonic#"($Rs)"#chop, [], "",
S_2op_tc_3or4x_SLOT23> {
bits<5> Rdd;
bits<5> Rs;
@@ -593,11 +221,9 @@ class F2_RDD_RS_CONVERT<string mnemonic,
let Uses = [USR], isFP = 1, hasNewValue = 1 in
class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
- SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
string chop ="">
: SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
- "$Rd = "#mnemonic#"($Rss)"#chop,
- [(set RCOut:$Rd, (Op RCIn:$Rss))], "",
+ "$Rd = "#mnemonic#"($Rss)"#chop, [], "",
S_2op_tc_3or4x_SLOT23> {
bits<5> Rd;
bits<5> Rss;
@@ -613,11 +239,9 @@ class F2_RD_RSS_CONVERT<string mnemonic,
let Uses = [USR], isFP = 1, hasNewValue = 1 in
class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
- SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
string chop ="">
: SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
- "$Rd = "#mnemonic#"($Rs)"#chop,
- [(set RCOut:$Rd, (Op RCIn:$Rs))], "",
+ "$Rd = "#mnemonic#"($Rs)"#chop, [], "",
S_2op_tc_3or4x_SLOT23> {
bits<5> Rd;
bits<5> Rs;
@@ -632,66 +256,41 @@ class F2_RD_RS_CONVERT<string mnemonic,
}
// Convert single precision to double precision and vice-versa.
-def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
- fpextend, F64, F32>;
-
-def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000,
- fpround, F32, F64>;
+def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000>;
+def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000>;
// Convert Integer to Floating Point.
-def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010,
- sint_to_fp, F32, I64>;
-def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001,
- uint_to_fp, F32, I64>;
-def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000,
- uint_to_fp, F32, I32>;
-def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000,
- sint_to_fp, F32, I32>;
-def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011,
- sint_to_fp, F64, I64>;
-def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010,
- uint_to_fp, F64, I64>;
-def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001,
- uint_to_fp, F64, I32>;
-def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010,
- sint_to_fp, F64, I32>;
-
-// Convert Floating Point to Integer - default.
-def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101,
- fp_to_uint, I32, F64, ":chop">;
-def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111,
- fp_to_sint, I32, F64, ":chop">;
+def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010>;
+def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001>;
+def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000>;
+def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000>;
+def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011>;
+def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010>;
+def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001>;
+def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010>;
+
+// Convert Floating Point to Integer.
+def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, ":chop">;
+def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, ":chop">;
def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
- fp_to_uint, I32, F32, ":chop">;
+ ":chop">;
def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
- fp_to_sint, I32, F32, ":chop">;
-def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110,
- fp_to_sint, I64, F64, ":chop">;
-def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111,
- fp_to_uint, I64, F64, ":chop">;
-def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110,
- fp_to_sint, I64, F32, ":chop">;
-def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101,
- fp_to_uint, I64, F32, ":chop">;
+ ":chop">;
+def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, ":chop">;
+def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, ":chop">;
+def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, ":chop">;
+def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, ":chop">;
// Convert Floating Point to Integer: non-chopped.
-let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
- def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000,
- fp_to_sint, I64, F64>;
- def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001,
- fp_to_uint, I64, F64>;
- def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011,
- fp_to_uint, I64, F32>;
- def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100,
- fp_to_sint, I64, F32>;
- def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011,
- fp_to_uint, I32, F64>;
- def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100,
- fp_to_sint, I32, F64>;
- def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000,
- fp_to_uint, I32, F32>;
- def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
- fp_to_sint, I32, F32>;
+let AddedComplexity = 20, Predicates = [HasV5T] in {
+ def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000>;
+ def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001>;
+ def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011>;
+ def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100>;
+ def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011>;
+ def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100>;
+ def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000>;
+ def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000>;
}
// Fix up radicand.
@@ -710,14 +309,6 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd
let Inst{4-0} = Rd;
}
-// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5T] in {
- def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
- def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
- def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
- def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
-}
-
// F2_sffma: Floating-point fused multiply add.
let Uses = [USR], isFP = 1, hasNewValue = 1 in
class T_sfmpy_acc <bit isSub, bit isLib>
@@ -747,15 +338,6 @@ def F2_sffms: T_sfmpy_acc <1, 0>;
def F2_sffma_lib: T_sfmpy_acc <0, 1>;
def F2_sffms_lib: T_sfmpy_acc <1, 1>;
-def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
- (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
-
-def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
- (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
-
-def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
- (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
-
// Floating-point fused multiply add w/ additional scaling (2**pu).
let Uses = [USR], isFP = 1, hasNewValue = 1 in
def F2_sffma_sc: MInst <
@@ -780,46 +362,6 @@ def F2_sffma_sc: MInst <
let Inst{4-0} = Rx;
}
-def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm),
- (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>,
- Requires<[HasV5T]>;
-
-def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt),
- (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>,
- Requires<[HasV5T]>;
-
-def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
- (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
- Requires<[HasV5T]>;
-
-def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
- (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
- Requires<[HasV5T]>;
-
-def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
- (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
- Requires<[HasV5T]>;
-
-def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
- (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
- Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = mux(p0, #i, r1)
-def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3),
- (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>,
- Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = mux(p0, r1, #i)
-def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3),
- (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>,
- Requires<[HasV5T]>;
-
-def: Pat<(i32 (fp_to_sint F64:$src1)),
- (LoReg (F2_conv_df2d_chop F64:$src1))>,
- Requires<[HasV5T]>;
-
//===----------------------------------------------------------------------===//
// :natural forms of vasrh and vasrhub insns
//===----------------------------------------------------------------------===//
@@ -910,7 +452,7 @@ let Defs = [USR_OVF], Itinerary = S_3op_
// Classify floating-point value
let Uses = [USR], isFP = 1 in
-def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
+def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>, Requires<[HasV5T]>;
let Uses = [USR], isFP = 1 in
def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5_0Imm:$u5),
@@ -953,11 +495,3 @@ let hasNewValue = 1, opNewValue = 0 in {
def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
-
-def : Pat <(fabs (f32 IntRegs:$src1)),
- (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
- Requires<[HasV5T]>;
-
-def : Pat <(fneg (f32 IntRegs:$src1)),
- (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
- Requires<[HasV5T]>;
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV60.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV60.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV60.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoV60.td Sat Nov 5 10:01:38 2016
@@ -10,23 +10,6 @@
// This file describes the Hexagon V60 instructions in TableGen format.
//
//===----------------------------------------------------------------------===//
-def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-
// Vector load
let Predicates = [HasV60T, UseHVX] in
let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
@@ -767,96 +750,6 @@ def V6_vS32b_nt_new_npred_ppu : T_vstore
V6_vS32b_nt_new_npred_ppu_enc;
}
-multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
- // Aligned stores
- def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
- (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
- Requires<[UseHVXSgl]>;
- def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
- (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
- Requires<[UseHVXSgl]>;
-
- // 128B Aligned stores
- def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
- (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
- Requires<[UseHVXDbl]>;
- def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
- (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
- Requires<[UseHVXDbl]>;
-
- // Fold Add R+OFF into vector store.
- let AddedComplexity = 10 in {
- def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
- (add IntRegs:$src2, s4_6ImmPred:$offset)),
- (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
- (VTSgl VectorRegs:$src1))>,
- Requires<[UseHVXSgl]>;
- def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
- (add IntRegs:$src2, s4_6ImmPred:$offset)),
- (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
- (VTSgl VectorRegs:$src1))>,
- Requires<[UseHVXSgl]>;
-
- // Fold Add R+OFF into vector store 128B.
- def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
- (add IntRegs:$src2, s4_7ImmPred:$offset)),
- (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
- (VTDbl VectorRegs128B:$src1))>,
- Requires<[UseHVXDbl]>;
- def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
- (add IntRegs:$src2, s4_7ImmPred:$offset)),
- (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
- (VTDbl VectorRegs128B:$src1))>,
- Requires<[UseHVXDbl]>;
- }
-}
-
-defm : vS32b_ai_pats <v64i8, v128i8>;
-defm : vS32b_ai_pats <v32i16, v64i16>;
-defm : vS32b_ai_pats <v16i32, v32i32>;
-defm : vS32b_ai_pats <v8i64, v16i64>;
-
-
-multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
- // Aligned loads
- def : Pat < (VTSgl (alignedload IntRegs:$addr)),
- (V6_vL32b_ai IntRegs:$addr, 0) >,
- Requires<[UseHVXSgl]>;
- def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
- (V6_vL32Ub_ai IntRegs:$addr, 0) >,
- Requires<[UseHVXSgl]>;
-
- // 128B Load
- def : Pat < (VTDbl (alignedload IntRegs:$addr)),
- (V6_vL32b_ai_128B IntRegs:$addr, 0) >,
- Requires<[UseHVXDbl]>;
- def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
- (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
- Requires<[UseHVXDbl]>;
-
- // Fold Add R+OFF into vector load.
- let AddedComplexity = 10 in {
- def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
- (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
- Requires<[UseHVXDbl]>;
- def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
- (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
- Requires<[UseHVXDbl]>;
-
- def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
- (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
- Requires<[UseHVXSgl]>;
- def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
- (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
- Requires<[UseHVXSgl]>;
- }
-}
-
-defm : vL32b_ai_pats <v64i8, v128i8>;
-defm : vL32b_ai_pats <v32i16, v64i16>;
-defm : vL32b_ai_pats <v16i32, v32i32>;
-defm : vL32b_ai_pats <v8i64, v16i64>;
-
// Vector load/store pseudos
@@ -873,29 +766,6 @@ def PS_vstorerw_ai_128B: STrivv_template
def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
Requires<[HasV60T,UseHVXDbl]>;
-multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
- def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
- (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
- Requires<[UseHVXSgl]>;
- def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
- (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
- Requires<[UseHVXSgl]>;
-
- def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
- (PS_vstorerw_ai_128B IntRegs:$addr, 0,
- (VTDbl VecDblRegs128B:$src1))>,
- Requires<[UseHVXDbl]>;
- def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
- (PS_vstorerwu_ai_128B IntRegs:$addr, 0,
- (VTDbl VecDblRegs128B:$src1))>,
- Requires<[UseHVXDbl]>;
-}
-
-defm : STrivv_pats <v128i8, v256i8>;
-defm : STrivv_pats <v64i16, v128i16>;
-defm : STrivv_pats <v32i32, v64i32>;
-defm : STrivv_pats <v16i64, v32i64>;
-
let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
class LDrivv_template<RegisterClass RC>
@@ -910,27 +780,6 @@ def PS_vloadrw_ai_128B: LDrivv_template<
def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
Requires<[HasV60T,UseHVXDbl]>;
-multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
- def : Pat<(VTSgl (alignedload I32:$addr)),
- (PS_vloadrw_ai I32:$addr, 0)>,
- Requires<[UseHVXSgl]>;
- def : Pat<(VTSgl (unalignedload I32:$addr)),
- (PS_vloadrwu_ai I32:$addr, 0)>,
- Requires<[UseHVXSgl]>;
-
- def : Pat<(VTDbl (alignedload I32:$addr)),
- (PS_vloadrw_ai_128B I32:$addr, 0)>,
- Requires<[UseHVXDbl]>;
- def : Pat<(VTDbl (unalignedload I32:$addr)),
- (PS_vloadrwu_ai_128B I32:$addr, 0)>,
- Requires<[UseHVXDbl]>;
-}
-
-defm : LDrivv_pats <v128i8, v256i8>;
-defm : LDrivv_pats <v64i16, v128i16>;
-defm : LDrivv_pats <v32i32, v64i32>;
-defm : LDrivv_pats <v16i64, v32i64>;
-
// Store vector predicate pseudo.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
@@ -977,20 +826,6 @@ let isCodeGenOnly = 1, isPseudo = 1, has
"", []>, Requires<[HasV60T,UseHVXDbl]>;
}
-let Predicates = [HasV60T,UseHVXSgl] in {
- def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt),
- (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>;
- def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt),
- (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>;
-}
-let Predicates = [HasV60T,UseHVXDbl] in {
- def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt),
- (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>;
- def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt),
- (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>;
-}
-
-
let hasNewValue = 1 in
class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
: CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
@@ -1519,20 +1354,6 @@ let isRegSequence = 1, Itinerary = CVI_V
defm V6_vcombine :
T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
-def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
- SDTCisSubVecOfVec<1, 0>]>;
-
-def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
-
-def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
- (v16i32 VectorRegs:$Vt))),
- (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
- (v32i32 VecDblRegs:$Vt))),
- (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-
let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
defm V6_vsathub :
T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
@@ -1593,46 +1414,6 @@ defm V6_vpackoh :
T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
}
-def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
- SDTCisInt<3>]>;
-
-def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
-
-// 0 as the last argument denotes vpacke. 1 denotes vpacko
-def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
- (v64i8 VectorRegs:$Vt), (i32 0))),
- (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
- (v64i8 VectorRegs:$Vt), (i32 1))),
- (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
- (v32i16 VectorRegs:$Vt), (i32 0))),
- (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
- (v32i16 VectorRegs:$Vt), (i32 1))),
- (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
- Requires<[UseHVXSgl]>;
-
-def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
- (v128i8 VecDblRegs:$Vt), (i32 0))),
- (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
- (v128i8 VecDblRegs:$Vt), (i32 1))),
- (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
- (v64i16 VecDblRegs:$Vt), (i32 0))),
- (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
- (v64i16 VecDblRegs:$Vt), (i32 1))),
- (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
- Requires<[UseHVXDbl]>;
-
let hasNewValue = 1, hasSideEffects = 0 in
class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
: CVI_VA_Resource1 <(outs RC2:$dst),
Modified: llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoVector.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoVector.td?rev=286036&r1=286035&r2=286036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoVector.td (original)
+++ llvm/trunk/lib/Target/Hexagon/HexagonInstrInfoVector.td Sat Nov 5 10:01:38 2016
@@ -11,37 +11,6 @@
//
//===----------------------------------------------------------------------===//
-def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
-def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
-def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
-def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
-def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
-def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
-def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
-def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
-
-
-multiclass bitconvert_32<ValueType a, ValueType b> {
- def : Pat <(b (bitconvert (a IntRegs:$src))),
- (b IntRegs:$src)>;
- def : Pat <(a (bitconvert (b IntRegs:$src))),
- (a IntRegs:$src)>;
-}
-
-multiclass bitconvert_64<ValueType a, ValueType b> {
- def : Pat <(b (bitconvert (a DoubleRegs:$src))),
- (b DoubleRegs:$src)>;
- def : Pat <(a (bitconvert (b DoubleRegs:$src))),
- (a DoubleRegs:$src)>;
-}
-
-// Bit convert vector types to integers.
-defm : bitconvert_32<v4i8, i32>;
-defm : bitconvert_32<v2i16, i32>;
-defm : bitconvert_64<v8i8, i64>;
-defm : bitconvert_64<v4i16, i64>;
-defm : bitconvert_64<v2i32, i64>;
-
// Vector shift support. Vector shifting in Hexagon is rather different
// from internal representation of LLVM.
// LLVM assumes all shifts (in vector case) will have the form
@@ -51,27 +20,17 @@ defm : bitconvert_64<v2i32, i64>;
// As a result, special care is needed to guarantee correctness and
// performance.
class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
- : S_2OpInstImm<Str, MajOp, MinOp, u4_0Imm,
- [(set (v4i16 DoubleRegs:$dst),
- (Op (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2))]> {
+ : S_2OpInstImm<Str, MajOp, MinOp, u4_0Imm, []> {
bits<4> src2;
let Inst{11-8} = src2;
}
class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
- : S_2OpInstImm<Str, MajOp, MinOp, u5_0Imm,
- [(set (v2i32 DoubleRegs:$dst),
- (Op (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2))]> {
+ : S_2OpInstImm<Str, MajOp, MinOp, u5_0Imm, []> {
bits<5> src2;
let Inst{12-8} = src2;
}
-def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
- (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
-
-def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
- (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
-
def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
@@ -80,87 +39,6 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vas
def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
-
-def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
-def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
-
-// Replicate the low 8-bits from 32-bits input register into each of the
-// four bytes of 32-bits destination register.
-def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
-
-// Replicate the low 16-bits from 32-bits input register into each of the
-// four halfwords of 64-bits destination register.
-def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
-
-
-class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
- : Pat <(Op Type:$Rss, Type:$Rtt),
- (MI Type:$Rss, Type:$Rtt)>;
-
-def: VArith_pat <A2_vaddub, add, V8I8>;
-def: VArith_pat <A2_vaddh, add, V4I16>;
-def: VArith_pat <A2_vaddw, add, V2I32>;
-def: VArith_pat <A2_vsubub, sub, V8I8>;
-def: VArith_pat <A2_vsubh, sub, V4I16>;
-def: VArith_pat <A2_vsubw, sub, V2I32>;
-
-def: VArith_pat <A2_and, and, V2I16>;
-def: VArith_pat <A2_xor, xor, V2I16>;
-def: VArith_pat <A2_or, or, V2I16>;
-
-def: VArith_pat <A2_andp, and, V8I8>;
-def: VArith_pat <A2_andp, and, V4I16>;
-def: VArith_pat <A2_andp, and, V2I32>;
-def: VArith_pat <A2_orp, or, V8I8>;
-def: VArith_pat <A2_orp, or, V4I16>;
-def: VArith_pat <A2_orp, or, V2I32>;
-def: VArith_pat <A2_xorp, xor, V8I8>;
-def: VArith_pat <A2_xorp, xor, V4I16>;
-def: VArith_pat <A2_xorp, xor, V2I32>;
-
-def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
- (i32 u5_0ImmPred:$c))))),
- (S2_asr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
- (i32 u5_0ImmPred:$c))))),
- (S2_lsr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
- (i32 u5_0ImmPred:$c))))),
- (S2_asl_i_vw V2I32:$b, imm:$c)>;
-
-def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
- (S2_asr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
- (S2_lsr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
- (S2_asl_i_vh V4I16:$b, imm:$c)>;
-
-
-def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
-def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
-
-def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
-def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
-def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
-
-def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
- (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
- (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
-def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
- (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
- (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
-def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
- (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
- (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
-
// Vector shift words by register
def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
@@ -173,65 +51,6 @@ def S2_lsr_r_vh : T_S3op_shiftVect < "vl
def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
-class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
- : Pat <(Op Value:$Rs, I32:$Rt),
- (MI Value:$Rs, I32:$Rt)>;
-
-def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
-def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
-def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
-def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
-def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
-def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
-
-
-def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
- [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
-def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
- [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
-def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
- [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
-
-def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
-def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
-def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
-
-
-class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
- : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
- (MI Value:$Rs, Value:$Rt)>;
-
-def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
-def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
-def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
-
-def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
-def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
-def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
-
-def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
-def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
-def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
-
-
-class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
- : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
- (MI InVal:$Rs, InVal:$Rt)>;
-
-def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
-def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
-def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
-
-def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
-def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
-def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
-
// Hexagon doesn't have a vector multiply with C semantics.
// Instead, generate a pseudo instruction that gets expaneded into two
@@ -239,237 +58,12 @@ def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V
// This is expanded by ExpandPostRAPseudos.
let isPseudo = 1 in
def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd),
- (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "",
- [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+ (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>;
let isPseudo = 1 in
def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd),
- (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "",
- [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+ (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [],
"$Rd = $Rx">;
-// Adds two v4i8: Hexagon does not have an insn for this one, so we
-// use the double add v8i8, and use only the low part of the result.
-def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
- (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
-
-// Subtract two v4i8: Hexagon does not have an insn for this one, so we
-// use the double sub v8i8, and use only the low part of the result.
-def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
- (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
-
-//
-// No 32 bit vector mux.
-//
-def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
- (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
-def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
- (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
-
-//
-// 64-bit vector mux.
-//
-def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
- (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
-def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
- (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
-def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
- (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
-
-//
-// No 32 bit vector compare.
-//
-def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
- (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
- (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
- (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
-
-def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
-
-
-class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
- ValueType CmpTy>
- : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
- (InvMI Value:$Rt, Value:$Rs)>;
-
-// Map from a compare operation to the corresponding instruction with the
-// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
-def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
-def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
-def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
-def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
-def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
-def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
-
-def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
-def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
-def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
-def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
-def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
-def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
-
-// Map from vcmpne(Rss) -> !vcmpew(Rss).
-// rs != rt -> !(rs == rt).
-def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
- (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
-
-
-// Truncate: from vector B copy all 'E'ven 'B'yte elements:
-// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
-def: Pat<(v4i8 (trunc V4I16:$Rs)),
- (S2_vtrunehb V4I16:$Rs)>;
-
-// Truncate: from vector B copy all 'O'dd 'B'yte elements:
-// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
-// S2_vtrunohb
-
-// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
-// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
-// S2_vtruneh
-
-def: Pat<(v2i16 (trunc V2I32:$Rs)),
- (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
-
-
-def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
-def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
-
-def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
-def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
-
-def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
-
-// Sign extends a v2i8 into a v2i32.
-def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
- (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
-
-// Sign extends a v2i16 into a v2i32.
-def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
- (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
-
-
-// Multiplies two v2i16 and returns a v2i32. We are using here the
-// saturating multiply, as hexagon does not provide a non saturating
-// vector multiply, and saturation does not impact the result that is
-// in double precision of the operands.
-
-// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
-// with the C semantics for this one, this pattern uses the half word
-// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
-// then truncated to fit this back into a v2i16 and to simulate the
-// wrap around semantics for unsigned in C.
-def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
- (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
-
-def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
- (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
- (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
-
-// Multiplies two v4i16 vectors.
-def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
- (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
- (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
-
-def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
- (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
- (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
-
-// Multiplies two v4i8 vectors.
-def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
- (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
- Requires<[HasV5T]>;
-
-def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
- (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
-
-// Multiplies two v8i8 vectors.
-def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
- (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
- (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
- Requires<[HasV5T]>;
-
-def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
- (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
- (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
-
-
-class shuffler<SDNode Op, string Str>
- : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
- "$a = " # Str # "($b, $c)",
- [(set (i64 DoubleRegs:$a),
- (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
- "", S_3op_tc_1_SLOT23>;
-
-def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
-
-def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
-def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
-def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
-def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
-
-class ShufflePat<InstHexagon MI, SDNode Op>
- : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
- (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
-def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
-
-// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
-def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
-
-// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
-def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
-
-// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
-def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
-
-
-// Truncated store from v4i16 to v4i8.
-def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr),
- [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
-
-// Truncated store from v2i32 to v2i16.
-def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
- (truncstore node:$val, node:$ptr),
- [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
-
-def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
- (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
- (LoReg $Rs))))>;
-
-def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
- (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
-
-
-// Zero and sign extended load from v2i8 into v2i16.
-def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
- [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
-
-def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
- [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
-
-def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
- (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
-
-def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
- (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
-def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
- (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
-def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
- (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
Added: llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td?rev=286036&view=auto
==============================================================================
--- llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td (added)
+++ llvm/trunk/lib/Target/Hexagon/HexagonPatterns.td Sat Nov 5 10:01:38 2016
@@ -0,0 +1,3273 @@
+// Pattern fragment that combines the value type and the register class
+// into a single parameter.
+// The pat frags in the definitions below need to have a named register,
+// otherwise i32 will be assumed regardless of the register class. The
+// name of the register does not matter.
+def I1 : PatLeaf<(i1 PredRegs:$R)>;
+def I32 : PatLeaf<(i32 IntRegs:$R)>;
+def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
+def F32 : PatLeaf<(f32 IntRegs:$R)>;
+def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
+
+// Pattern fragments to extract the low and high subregisters from a
+// 64-bit value.
+def LoReg: OutPatFrag<(ops node:$Rs),
+ (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
+def HiReg: OutPatFrag<(ops node:$Rs),
+ (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
+
+def orisadd: PatFrag<(ops node:$Addr, node:$off),
+ (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM1Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-2.
+def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-2 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM2Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-3.
+def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-3 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformSToSM3Imm(imm, SDLoc(N));
+}]>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ uint32_t imm = N->getZExtValue();
+ return XformUToUM1Imm(imm, SDLoc(N));
+}]>;
+
+class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
+ : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
+ (MI IntRegs:$src1, ImmPred:$src2)>;
+
+def : T_CMP_pat <C2_cmpeqi, seteq, s10_0ImmPred>;
+def : T_CMP_pat <C2_cmpgti, setgt, s10_0ImmPred>;
+def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>;
+
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>;
+
+// Pats for instruction selection.
+class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
+ : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: BinOp32_pat<add, A2_add, i32>;
+def: BinOp32_pat<and, A2_and, i32>;
+def: BinOp32_pat<or, A2_or, i32>;
+def: BinOp32_pat<sub, A2_sub, i32>;
+def: BinOp32_pat<xor, A2_xor, i32>;
+
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL, S2_packhl, i64>;
+
+// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
+// that reverse the order of the operands.
+class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
+
+// Pats for compares. They use PatFrags as operands, not SDNodes,
+// since seteq/setgt/etc. are defined as ParFrags.
+class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
+ : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: T_cmp32_rr_pat<C2_cmpeq, seteq, i1>;
+def: T_cmp32_rr_pat<C2_cmpgt, setgt, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
+
+def: T_cmp32_rr_pat<C2_cmpgt, RevCmp<setlt>, i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
+
+def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
+ (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(i32 (add I32:$Rs, s32_0ImmPred:$s16)),
+ (i32 (A2_addi I32:$Rs, imm:$s16))>;
+
+def: Pat<(or (i32 IntRegs:$Rs), s32_0ImmPred:$s10),
+ (A2_orir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(and (i32 IntRegs:$Rs), s32_0ImmPred:$s10),
+ (A2_andir IntRegs:$Rs, imm:$s10)>;
+
+def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
+ (A2_subri imm:$s10, IntRegs:$Rs)>;
+
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def: Pat<(not (i32 IntRegs:$src1)),
+ (A2_subri -1, IntRegs:$src1)>;
+
+def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
+def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
+
+def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs)),
+ (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
+
+def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8)),
+ (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
+
+def : Pat<(i32 (select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8)),
+ (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
+
+def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>;
+def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
+
+class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
+ : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
+ (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
+
+def: T_vcmp_pat<A2_vcmpbeq, seteq, v8i8>;
+def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
+def: T_vcmp_pat<A2_vcmpheq, seteq, v4i16>;
+def: T_vcmp_pat<A2_vcmphgt, setgt, v4i16>;
+def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
+def: T_vcmp_pat<A2_vcmpweq, seteq, v2i32>;
+def: T_vcmp_pat<A2_vcmpwgt, setgt, v2i32>;
+def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
+
+// Add halfword.
+def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
+ (A2_addh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
+ (A2_addh_l16_hl I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
+ (A2_addh_h16_ll I32:$src1, I32:$src2)>;
+
+// Subtract halfword.
+def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
+ (A2_subh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
+ (A2_subh_h16_ll I32:$src1, I32:$src2)>;
+
+// Here, depending on the operand being selected, we'll either generate a
+// min or max instruction.
+// Ex:
+// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
+// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
+// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
+// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
+
+multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
+ InstHexagon Inst, InstHexagon SwapInst> {
+ def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+ (VT RC:$src1), (VT RC:$src2)),
+ (Inst RC:$src1, RC:$src2)>;
+ def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
+ (VT RC:$src2), (VT RC:$src1)),
+ (SwapInst RC:$src1, RC:$src2)>;
+}
+
+
+multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+ defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
+
+ def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))),
+ (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))), i16),
+ (Inst IntRegs:$src1, IntRegs:$src2)>;
+
+ def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
+ (i32 PositiveHalfWord:$src2))),
+ (i32 PositiveHalfWord:$src2),
+ (i32 PositiveHalfWord:$src1))), i16),
+ (SwapInst IntRegs:$src1, IntRegs:$src2)>;
+}
+
+let AddedComplexity = 200 in {
+ defm: MinMax_pats<setge, A2_max, A2_min>;
+ defm: MinMax_pats<setgt, A2_max, A2_min>;
+ defm: MinMax_pats<setle, A2_min, A2_max>;
+ defm: MinMax_pats<setlt, A2_min, A2_max>;
+ defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
+ defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
+ defm: MinMax_pats<setule, A2_minu, A2_maxu>;
+ defm: MinMax_pats<setult, A2_minu, A2_maxu>;
+}
+
+class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
+ : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
+ (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
+
+def: T_cmp64_rr_pat<C2_cmpeqp, seteq>;
+def: T_cmp64_rr_pat<C2_cmpgtp, setgt>;
+def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
+def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
+def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
+
+def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
+
+def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
+
+def: Pat<(i1 (not (i1 PredRegs:$Ps))),
+ (C2_not PredRegs:$Ps)>;
+
+def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>;
+
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
+
+def: Pat<(br bb:$dst),
+ (J2_jump brtarget:$dst)>;
+def: Pat<(retflag),
+ (PS_jmpret (i32 R31))>;
+def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
+ (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+def: Pat<(eh_return),
+ (EH_RETURN_JMPR (i32 R31))>;
+def: Pat<(brind (i32 IntRegs:$dst)),
+ (J2_jumpr IntRegs:$dst)>;
+
+// Patterns to select load-indexed (i.e. load from base+offset).
+multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
+ InstHexagon MI> {
+ def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+ def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+ (VT (MI AddrFI:$fi, imm:$Off))>;
+ def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))),
+ (VT (MI AddrFI:$fi, imm:$Off))>;
+ def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
+ (VT (MI IntRegs:$Rs, imm:$Off))>;
+ def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
+}
+
+let AddedComplexity = 20 in {
+ defm: Loadx_pat<load, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<load, i64, s29_3ImmPred, L2_loadrd_io>;
+ defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+ defm: Loadx_pat<extloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<extloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ defm: Loadx_pat<sextloadi8, i32, s32_0ImmPred, L2_loadrb_io>;
+ defm: Loadx_pat<sextloadi16, i32, s31_1ImmPred, L2_loadrh_io>;
+ defm: Loadx_pat<zextloadi1, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi8, i32, s32_0ImmPred, L2_loadrub_io>;
+ defm: Loadx_pat<zextloadi16, i32, s31_1ImmPred, L2_loadruh_io>;
+ // No sextloadi1.
+}
+
+// Sign-extending loads of i1 need to replicate the lowest bit throughout
+// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
+// do the trick.
+let AddedComplexity = 20 in
+def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
+ (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
+
+def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8),
+ (M2_mpysip IntRegs:$Rs, imm:$u8)>;
+def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)),
+ (M2_mpysin IntRegs:$Rs, imm:$u8)>;
+def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2),
+ (M2_mpysmi IntRegs:$src1, imm:$src2)>;
+def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
+ (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
+def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
+ (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
+ (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
+def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
+ (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
+ PatLeaf ImmPred>
+ : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
+ (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
+
+class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+ : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
+ (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>;
+
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>;
+def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+
+def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
+def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
+def: T_MType_acc_pat2 <M4_or_and, and, or>;
+def: T_MType_acc_pat2 <M4_and_and, and, and>;
+def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
+def: T_MType_acc_pat2 <M4_or_or, or, or>;
+def: T_MType_acc_pat2 <M4_and_or, or, and>;
+def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
+
+class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+ : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
+ (not IntRegs:$src3)))),
+ (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
+
+def: T_MType_acc_pat3 <M4_or_andn, and, or>;
+def: T_MType_acc_pat3 <M4_and_andn, and, and>;
+def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
+
+def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
+ (i64 (anyext (i32 IntRegs:$src2))))),
+ (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
+ (i64 (sext (i32 IntRegs:$src2))))),
+ (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(i64 (mul (is_sext_i32:$src1),
+ (is_sext_i32:$src2))),
+ (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (sext (i32 IntRegs:$src2))),
+ (i64 (sext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (add (i64 DoubleRegs:$src1),
+ (mul (i64 (zext (i32 IntRegs:$src2))),
+ (i64 (zext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (anyext (i32 IntRegs:$src2))),
+ (i64 (anyext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
+ (mul (i64 (zext (i32 IntRegs:$src2))),
+ (i64 (zext (i32 IntRegs:$src3)))))),
+ (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
+ InstHexagon MI>
+ : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
+ (MI I32:$src2, imm:$offset, Value:$src1)>;
+
+def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>;
+def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
+def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>;
+def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>;
+
+// Patterns for generating stores, where the address takes different forms:
+// - frameindex,
+// - frameindex + offset,
+// - base + offset,
+// - simple (base address without offset).
+// These would usually be used together (via Storex_pat defined below), but
+// in some cases one may want to apply different properties (such as
+// AddedComplexity) to the individual patterns.
+class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI> {
+ def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+ def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+}
+multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ InstHexagon MI> {
+ def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+ def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+}
+class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+ (MI IntRegs:$Rs, 0, Value:$Rt)>;
+
+// Patterns for generating stores, where the address takes different forms,
+// and where the value being stored is transformed through the value modifier
+// ValueMod. The address forms are same as above.
+class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rs, AddrFI:$fi),
+ (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI> {
+ def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+ def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
+ (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+}
+multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+ PatFrag ValueMod, InstHexagon MI> {
+ def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+ def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
+ (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+}
+class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
+ (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
+
+multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+ InstHexagon MI> {
+ def: Storex_fi_pat <Store, Value, MI>;
+ defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+ defm: Storex_add_pat <Store, Value, ImmPred, MI>;
+}
+
+multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+ PatFrag ValueMod, InstHexagon MI> {
+ def: Storexm_fi_pat <Store, Value, ValueMod, MI>;
+ defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+ defm: Storexm_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+}
+
+// Regular stores in the DAG have two operands: value and address.
+// Atomic stores also have two, but they are reversed: address, value.
+// To use atomic stores with the patterns, they need to have their operands
+// swapped. This relies on the knowledge that the F.Fragment uses names
+// "ptr" and "val".
+class SwapSt<PatFrag F>
+ : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
+ F.OperandTransform>;
+
+let AddedComplexity = 20 in {
+ defm: Storex_pat<truncstorei8, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<truncstorei16, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<store, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<store, I64, s29_3ImmPred, S2_storerd_io>;
+
+ defm: Storex_pat<SwapSt<atomic_store_8>, I32, s32_0ImmPred, S2_storerb_io>;
+ defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+ defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
+}
+
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<truncstorei8, I32, S2_storerb_io>;
+def: Storex_simple_pat<truncstorei16, I32, S2_storerh_io>;
+def: Storex_simple_pat<store, I32, S2_storeri_io>;
+def: Storex_simple_pat<store, I64, S2_storerd_io>;
+
+def: Storex_simple_pat<SwapSt<atomic_store_8>, I32, S2_storerb_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
+
+let AddedComplexity = 20 in {
+ defm: Storexm_pat<truncstorei8, I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+ defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+ defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8, I64, LoReg, S2_storerb_io>;
+def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
+def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
+
+def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
+
+def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
+ (i32 (sub 0, (i32 IntRegs:$src))),
+ (i32 IntRegs:$src))),
+ (A2_abs IntRegs:$src)>;
+
+let AddedComplexity = 50 in
+def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
+ (i32 IntRegs:$src)),
+ (sra (i32 IntRegs:$src), (i32 31)))),
+ (A2_abs IntRegs:$src)>;
+
+def: Pat<(sra (i32 IntRegs:$src), u5_0ImmPred:$u5),
+ (S2_asr_i_r IntRegs:$src, imm:$u5)>;
+def: Pat<(srl (i32 IntRegs:$src), u5_0ImmPred:$u5),
+ (S2_lsr_i_r IntRegs:$src, imm:$u5)>;
+def: Pat<(shl (i32 IntRegs:$src), u5_0ImmPred:$u5),
+ (S2_asl_i_r IntRegs:$src, imm:$u5)>;
+
+def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5_0ImmPred:$src2)),
+ (i32 1))),
+ (i32 1))),
+ (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>;
+
+def : Pat<(not (i64 DoubleRegs:$src1)),
+ (A2_notp DoubleRegs:$src1)>;
+
+// Count leading zeros.
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+
+// Count trailing zeros: 32-bit.
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
+
+// Count leading ones.
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Count trailing ones: 32-bit.
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
+
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5_0ImmPred:$u5)))),
+ (S2_clrbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))),
+ (S2_setbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5_0ImmPred:$u5))),
+ (S2_togglebit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
+ (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+ (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
+ (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+ def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+ (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
+ def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+ (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
+ (S2_tstbit_i IntRegs:$Rs, 0)>;
+ def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
+ (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
+}
+
+let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
+ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6_0ImmPred:$u6), 0)),
+ (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>;
+ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
+ (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
+}
+
+let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
+ (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
+ (i32 8)),
+ (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
+ (zextloadi8 (i32 IntRegs:$b))),
+ (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
+// Patterns for loads of i1:
+def: Pat<(i1 (load AddrFI:$fi)),
+ (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
+def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32_0ImmPred:$Off))),
+ (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
+def: Pat<(i1 (load (i32 IntRegs:$Rs))),
+ (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def I1toI32: OutPatFrag<(ops node:$Rs),
+ (C2_muxii (i1 $Rs), 1, 0)>;
+
+def I32toI1: OutPatFrag<(ops node:$Rs),
+ (i1 (C2_tfrrp (i32 $Rs)))>;
+
+defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
+def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
+
+def: Pat<(sra (i64 DoubleRegs:$src), u6_0ImmPred:$u6),
+ (S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
+def: Pat<(srl (i64 DoubleRegs:$src), u6_0ImmPred:$u6),
+ (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>;
+def: Pat<(shl (i64 DoubleRegs:$src), u6_0ImmPred:$u6),
+ (S2_asl_i_p DoubleRegs:$src, imm:$u6)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$Rt), (shl (i32 IntRegs:$Rs), u3_0ImmPred:$u3)),
+ (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
+
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
+def: Pat<(HexagonBARRIER), (Y2_barrier)>;
+
+def: Pat<(orisadd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
+ (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>;
+
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
+ SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// Map TLS addressses to A2_tfrsi.
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s16_0Ext:$label)>;
+
+def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
+def: Pat<(i1 0), (PS_false)>;
+def: Pat<(i1 1), (PS_true)>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+
+def: Pat<(callseq_start timm:$amt),
+ (ADJCALLSTACKDOWN imm:$amt)>;
+def: Pat<(callseq_end timm:$amt1, timm:$amt2),
+ (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;
+
+//Tail calls.
+def: Pat<(HexagonTCRet tglobaladdr:$dst),
+ (PS_tailcall_i tglobaladdr:$dst)>;
+def: Pat<(HexagonTCRet texternalsym:$dst),
+ (PS_tailcall_i texternalsym:$dst)>;
+def: Pat<(HexagonTCRet I32:$dst),
+ (PS_tailcall_r I32:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
+def: Pat<(and (i32 IntRegs:$src1), 65535),
+ (A2_zxth IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def: Pat<(and (i32 IntRegs:$src1), 255),
+ (A2_zxtb IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+// Add(p1, false) should never be produced,
+// if it does, it got to be mapped to NOOP.
+def: Pat<(add (i1 PredRegs:$src1), -1),
+ (C2_not PredRegs:$src1)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def: Pat<(select (not (i1 PredRegs:$src1)), s8_0ImmPred:$src2, s32_0ImmPred:$src3),
+ (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = C2_muxir(p0, r1, #i)
+def: Pat<(select (not (i1 PredRegs:$src1)), s32_0ImmPred:$src2,
+ (i32 IntRegs:$src3)),
+ (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = C2_muxri (p0, #i, r1)
+def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32_0ImmPred:$src3),
+ (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
+ (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
+ (A2_sxtw (LoReg DoubleRegs:$src1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
+ (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
+def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
+ (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
+
+// We want to prevent emitting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a J2_jumpf.
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ bb:$offset),
+ (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
+ bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10_0ImmPred:$src2)),
+ bb:$offset),
+ (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10_0ImmPred:$src2), bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
+ (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
+ (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
+def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8_0ImmPred:$src2)), bb:$offset),
+ (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8_0ImmPred:$src2)),
+ bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
+ (i64 DoubleRegs:$src3)),
+ (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
+ (HiReg DoubleRegs:$src3)),
+ (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
+ (LoReg DoubleRegs:$src3)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
+ (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
+ (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
+ (LoReg DoubleRegs:$src)>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
+ (C2_tfrrp (LoReg DoubleRegs:$src))>;
+
+// rs <= rt -> !(rs > rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setle (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
+ (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
+ (C2_xor PredRegs:$src1, PredRegs:$src2)>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
+ (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
+
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+let AddedComplexity = 30 in
+def: Pat<(i1 (setge (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
+ (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+// rs < rt -> !(rs >= rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
+ (C2_not (C2_cmpgti IntRegs:$src1,
+ (DEC_CONST_SIGNED s32_0ImmPred:$src2)))>;
+
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
+ (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32_0ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32_0ImmPred:$src2))>;
+
+// Generate cmpgtu(Rs, #u9)
+def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32_0ImmPred:$src2)),
+ (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
+ (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Sign extends.
+// i1 -> i32
+def: Pat<(i32 (sext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, -1, 0)>;
+
+// i1 -> i64
+def: Pat<(i64 (sext (i1 PredRegs:$src1))),
+ (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
+
+// Zero extends.
+// i1 -> i32
+def: Pat<(i32 (zext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
+ (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
+ (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// Clear the sign bit in a 64-bit register.
+def ClearSign : OutPatFrag<(ops node:$Rss),
+ (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>;
+
+def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (A2_addp
+ (M2_dpmpyuu_acc_s0
+ (S2_lsr_i_p
+ (A2_addp
+ (M2_dpmpyuu_acc_s0
+ (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
+ (HiReg $Rss),
+ (LoReg $Rtt)),
+ (A2_combinew (A2_tfrsi 0),
+ (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
+ 32),
+ (HiReg $Rss),
+ (HiReg $Rtt)),
+ (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
+
+// Multiply 64-bit signed and use upper result.
+//
+// For two signed 64-bit integers A and B, let A' and B' denote A and B
+// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
+// sign bit of A (and identically for B). With this notation, the signed
+// product A*B can be written as:
+// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
+// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
+// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
+// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
+
+def : Pat <(mulhs I64:$Rss, I64:$Rtt),
+ (A2_subp
+ (MulHU $Rss, $Rtt),
+ (A2_addp
+ (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
+ (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonALLOCA : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
+ [SDNPHasChain]>;
+
+
+def: Pat<(HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)),
+ (PS_alloca IntRegs:$Rs, imm:$A)>;
+
+def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
+
+def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>;
+def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(sub (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(and (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(or (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(sub (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(and (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(or (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i32 IntRegs:$src1), (sra (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i32 IntRegs:$src1), (srl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i32 IntRegs:$src1), (shl (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$Rs), (i32 IntRegs:$Rt))), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(sra (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+def: Pat<(srl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl (i64 DoubleRegs:$src1), (i32 IntRegs:$src2)), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(sra (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(srl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl (i32 IntRegs:$src1), (i32 IntRegs:$src2)), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>;
+
+def SDTHexagonINSERT:
+ SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTHexagonINSERTRP:
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisVT<3, i64>]>;
+
+def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
+def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
+
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>;
+def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
+ (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+let AddedComplexity = 100 in
+def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
+ (i32 (extloadi8 (add I32:$b, 3))),
+ 24, 8),
+ (i32 16)),
+ (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+ (zextloadi8 I32:$b)),
+ (A2_swiz (L2_loadri_io I32:$b, 0))>;
+
+def SDTHexagonEXTRACTU:
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTURP:
+ SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i64>]>;
+
+def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
+def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
+
+def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+ (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+ (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
+ (S2_extractu_rp I32:$src1, I64:$src2)>;
+def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
+ (S2_extractup_rp I64:$src1, I64:$src2)>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def: Pat<(mul (i32 IntRegs:$src1), (ineg n8_0ImmPred:$src2)),
+ (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>;
+
+multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+ defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
+}
+
+def: Pat<(add (i64 (sext (i32 IntRegs:$Rs))), (i64 DoubleRegs:$Rt)),
+ (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>;
+
+let AddedComplexity = 200 in {
+ defm: MinMax_pats_p<setge, A2_maxp, A2_minp>;
+ defm: MinMax_pats_p<setgt, A2_maxp, A2_minp>;
+ defm: MinMax_pats_p<setle, A2_minp, A2_maxp>;
+ defm: MinMax_pats_p<setlt, A2_minp, A2_maxp>;
+ defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
+ defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
+ defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
+ defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
+}
+
+def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+
+// Map call instruction
+def : Pat<(callv3 I32:$dst),
+ (J2_callr I32:$dst)>;
+def : Pat<(callv3 tglobaladdr:$dst),
+ (J2_call tglobaladdr:$dst)>;
+def : Pat<(callv3 texternalsym:$dst),
+ (J2_call texternalsym:$dst)>;
+def : Pat<(callv3 tglobaltlsaddr:$dst),
+ (J2_call tglobaltlsaddr:$dst)>;
+
+def : Pat<(callv3nr I32:$dst),
+ (PS_callr_nr I32:$dst)>;
+def : Pat<(callv3nr tglobaladdr:$dst),
+ (PS_call_nr tglobaladdr:$dst)>;
+def : Pat<(callv3nr texternalsym:$dst),
+ (PS_call_nr texternalsym:$dst)>;
+
+
+def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
+def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
+
+def BITPOS32 : SDNodeXForm<imm, [{
+ // Return the bit position we will set [0-31].
+ // As an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+
+// Pats for instruction selection.
+
+// A class to embed the usual comparison patfrags within a zext to i32.
+// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
+// names, or else the frag's "body" won't match the operands.
+class CmpInReg<PatFrag Op>
+ : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
+
+def: T_cmp32_rr_pat<A4_rcmpeq, CmpInReg<seteq>, i32>;
+def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
+
+def: T_cmp32_rr_pat<C4_cmpneq, setne, i1>;
+def: T_cmp32_rr_pat<C4_cmplte, setle, i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplte, RevCmp<setge>, i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
+
+let AddedComplexity = 100 in {
+ def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 255), 0)),
+ (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 255), 0)),
+ (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
+ def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 65535), 0)),
+ (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
+ def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
+ 65535), 0)),
+ (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
+}
+
+def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))),
+ (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32_0ImmPred:$s8)))),
+ (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
+
+// Preserve the S2_tstbit_r generation
+def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
+ (i32 IntRegs:$src1))), 0)))),
+ (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
+
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i),
+ (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>;
+
+def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r),
+ (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>;
+}
+
+// The complexity of the combine with two immediates should be greater than
+// the complexity of a combine involving a register.
+let AddedComplexity = 75 in {
+def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6),
+ (A4_combineii imm:$s8, imm:$u6)>;
+def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
+ (A2_combineii imm:$s8, imm:$S8)>;
+}
+
+
+def Zext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A4_combineir 0, (i32 $Rs)))>;
+def Sext64: OutPatFrag<(ops node:$Rs),
+ (i64 (A2_sxtw (i32 $Rs)))>;
+
+// Patterns to generate indexed loads with different forms of the address:
+// - frameindex,
+// - base + offset,
+// - base (without offset).
+multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+ PatLeaf ImmPred, InstHexagon MI> {
+ def: Pat<(VT (Load AddrFI:$fi)),
+ (VT (ValueMod (MI AddrFI:$fi, 0)))>;
+ def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+ (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
+ def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
+ (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
+ def: Pat<(VT (Load (i32 IntRegs:$Rs))),
+ (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
+}
+
+defm: Loadxm_pat<extloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8, i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8, i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
+
+// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
+def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
+
+multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3)))),
+ (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tglobaladdr:$src2)))),
+ (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
+
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+ (HexagonCONST32 tconstpool:$src3)))),
+ (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tconstpool:$src2)))),
+ (MI IntRegs:$src1, 0, tconstpool:$src2)>;
+
+ def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+ (HexagonCONST32 tjumptable:$src3)))),
+ (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>;
+ def : Pat <(VT (ldOp (add IntRegs:$src1,
+ (HexagonCONST32 tjumptable:$src2)))),
+ (MI IntRegs:$src1, 0, tjumptable:$src2)>;
+}
+
+let AddedComplexity = 60 in {
+defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
+defm : T_LoadAbsReg_Pat <extloadi8, L4_loadrub_ur>;
+
+defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
+defm : T_LoadAbsReg_Pat <extloadi16, L4_loadruh_ur>;
+
+defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
+defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
+}
+
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add (i32 IntRegs:$Rs),
+ (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2))))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
+let AddedComplexity = 40 in {
+ def: Loadxs_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxs_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxs_pat<load, i32, L4_loadri_rr>;
+ def: Loadxs_pat<load, i64, L4_loadrd_rr>;
+}
+
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+ : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
+ (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+let AddedComplexity = 20 in {
+ def: Loadxs_simple_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_simple_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxs_simple_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxs_simple_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxs_simple_pat<load, i32, L4_loadri_rr>;
+ def: Loadxs_simple_pat<load, i64, L4_loadrd_rr>;
+}
+
+// zext i1->i64
+def: Pat<(i64 (zext (i1 PredRegs:$src1))),
+ (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(i64 (zext (i32 IntRegs:$src1))),
+ (Zext64 IntRegs:$src1)>;
+
+let AddedComplexity = 40 in
+multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
+ PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl (i32 IntRegs:$src1), u2_0ImmPred:$src2),
+ u32_0ImmPred:$src3)),
+ (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+ (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+ (HexagonCONST32 tglobaladdr:$src3))),
+ (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+ (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
+ (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
+defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
+defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
+defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
+
+class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
+ (i32 (shl (i32 IntRegs:$Rt), u2_0ImmPred:$u2)))),
+ (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
+
+let AddedComplexity = 40 in {
+ def: Storexs_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Storexs_pat<store, I32, S4_storeri_rr>;
+ def: Storexs_pat<store, I64, S4_storerd_rr>;
+}
+
+def s30_2ProperPred : PatLeaf<(i32 imm), [{
+ int64_t v = (int64_t)N->getSExtValue();
+ return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
+}]>;
+def RoundTo8 : SDNodeXForm<imm, [{
+ int32_t Imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
+}]>;
+
+let AddedComplexity = 40 in
+def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
+ (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
+
+class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+ : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
+ (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
+
+let AddedComplexity = 20 in {
+ def: Store_rr_pat<truncstorei8, I32, S4_storerb_rr>;
+ def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
+ def: Store_rr_pat<store, I32, S4_storeri_rr>;
+ def: Store_rr_pat<store, I64, S4_storerd_rr>;
+}
+
+
+def IMM_BYTE : SDNodeXForm<imm, [{
+ // -1 etc is represented as 255 etc
+ // assigning to a byte restores our desired signed value.
+ int8_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_HALF : SDNodeXForm<imm, [{
+ // -1 etc is represented as 65535 etc
+ // assigning to a short restores our desired signed value.
+ int16_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_WORD : SDNodeXForm<imm, [{
+ // -1 etc can be represented as 4294967295 etc
+ // Currently, it's not doing this. But some optimization
+ // might convert -1 to a large +ve number.
+ // assigning to a word restores our desired signed value.
+ int32_t imm = N->getSExtValue();
+ return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
+def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
+def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
+
+// Emit store-immediate, but only when the stored value will not be constant-
+// extended. The reason for that is that there is no pass that can optimize
+// constant extenders in store-immediate instructions. In some cases we can
+// end up will a number of such stores, all of which store the same extended
+// value (e.g. after unrolling a loop that initializes floating point array).
+
+// Predicates to determine if the 16-bit immediate is expressible as a sign-
+// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
+// beyond 0..15, so we don't care what is in there.
+
+def i16in8ImmPred: PatLeaf<(i32 imm), [{
+ int64_t v = (int16_t)N->getSExtValue();
+ return v == (int64_t)(int8_t)v;
+}]>;
+
+// Predicates to determine if the 32-bit immediate is expressible as a sign-
+// extended 8-bit immediate.
+def i32in8ImmPred: PatLeaf<(i32 imm), [{
+ int64_t v = (int32_t)N->getSExtValue();
+ return v == (int64_t)(int8_t)v;
+}]>;
+
+
+let AddedComplexity = 40 in {
+ // Even though the offset is not extendable in the store-immediate, we
+ // can still generate the fi# in the base address. If the final offset
+ // is not valid for the instruction, we will replace it with a scratch
+ // register.
+// def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
+// def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
+// S4_storeirh_io>;
+// def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
+
+// defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
+// S4_storeirb_io>;
+// defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
+// ToImmHalf, S4_storeirh_io>;
+// defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
+// S4_storeiri_io>;
+
+ defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
+ S4_storeirb_io>;
+ defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
+ S4_storeirh_io>;
+ defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
+ S4_storeiri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store, s32_0ImmPred, ToImmWord, S4_storeiri_io>;
+
+// op(Ps, op(Pt, Pu))
+class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+ : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
+ (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+// op(Ps, op(Pt, ~Pu))
+class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+ : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
+ (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+def: LogLog_pat<and, and, C4_and_and>;
+def: LogLog_pat<and, or, C4_and_or>;
+def: LogLog_pat<or, and, C4_or_and>;
+def: LogLog_pat<or, or, C4_or_or>;
+
+def: LogLogNot_pat<and, and, C4_and_andn>;
+def: LogLogNot_pat<and, or, C4_and_orn>;
+def: LogLogNot_pat<or, and, C4_or_andn>;
+def: LogLogNot_pat<or, or, C4_or_orn>;
+
+//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonAtGot
+ : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
+ : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
+
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+ (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+ (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+ (C4_addipc imm:$addr)>;
+
+def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+ (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(i64 (or (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
+ (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(add (i32 IntRegs:$Rs), (add (i32 IntRegs:$Ru), s32_0ImmPred:$s6)),
+ (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
+
+// Rd=add(Rs,sub(#s6,Ru))
+def: Pat<(add (i32 IntRegs:$src1), (sub s32_0ImmPred:$src2,
+ (i32 IntRegs:$src3))),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=sub(add(Rs,#s6),Ru)
+def: Pat<(sub (add (i32 IntRegs:$src1), s32_0ImmPred:$src2),
+ (i32 IntRegs:$src3)),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=add(sub(Rs,Ru),#s6)
+def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
+ (s32_0ImmPred:$src2)),
+ (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
+
+def: Pat<(xor (i64 DoubleRegs:$dst2),
+ (xor (i64 DoubleRegs:$Rss), (i64 DoubleRegs:$Rtt))),
+ (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>;
+def: Pat<(or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)),
+ (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>;
+
+def: Pat<(or (i32 IntRegs:$src1), (and (i32 IntRegs:$Rs), s32_0ImmPred:$s10)),
+ (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
+
+def: Pat<(or (i32 IntRegs:$src1), (or (i32 IntRegs:$Rs), s32_0ImmPred:$s10)),
+ (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
+
+
+
+// Count trailing zeros: 64-bit.
+def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
+
+// Count trailing ones: 64-bit.
+def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+
+// Define leading/trailing patterns that require zero-extensions to 64 bits.
+def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
+
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+ def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
+ (S4_ntstbit_i (i32 IntRegs:$Rs), u5_0ImmPred:$u5)>;
+ def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
+ (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
+}
+
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+// if ([!]tstbit(...)) jump ...
+let AddedComplexity = 100 in
+def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+ (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
+ (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
+
+// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
+// represented as a compare against "value & 0xFF", which is an exact match
+// for cmpb (same for cmph). The patterns below do not contain any additional
+// complexity that would make them preferable, and if they were actually used
+// instead of cmpb/cmph, they would result in a compare against register that
+// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
+def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
+ (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
+ (C4_nbitsclr I32:$Rs, I32:$Rt)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
+ (C4_nbitsset I32:$Rs, I32:$Rt)>;
+
+
+def: Pat<(add (mul (i32 IntRegs:$Rs), u6_0ImmPred:$U6), u32_0ImmPred:$u6),
+ (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
+def: Pat<(add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32_0ImmPred:$u6),
+ (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), u6_2ImmPred:$src2)),
+ (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
+def: Pat<(add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), u32_0ImmPred:$src2)),
+ (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>;
+
+def: Pat<(add (i32 IntRegs:$Ru), (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))),
+ (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>;
+
+def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
+
+class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
+ : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8),
+ (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
+
+let AddedComplexity = 200 in {
+ def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>;
+ def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>;
+ def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>;
+ def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>;
+}
+
+let AddedComplexity = 30 in {
+ def : T_Shift_CommOp_pat <S4_ori_asl_ri, or, shl>;
+ def : T_Shift_CommOp_pat <S4_ori_lsr_ri, or, srl>;
+}
+
+class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
+ : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)),
+ (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
+
+def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>;
+def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>;
+
+let AddedComplexity = 200 in {
+ def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
+ (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+ def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
+ (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+ def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
+ (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+ def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
+ (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+}
+
+def: Pat<(shl s6_0ImmPred:$s6, (i32 IntRegs:$Rt)),
+ (S4_lsli imm:$s6, IntRegs:$Rt)>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP
+//===----------------------------------------------------------------------===//
+
+def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
+ int8_t v = (int8_t)N->getSExtValue();
+ return v > -32 && v <= -1;
+}]>;
+
+def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
+ int16_t v = (int16_t)N->getSExtValue();
+ return v > -32 && v <= -1;
+}]>;
+
+def Clr5Imm8Pred : PatLeaf<(i32 imm), [{
+ uint32_t v = (uint8_t)~N->getZExtValue();
+ return ImmIsSingleBit(v);
+}]>;
+
+def Clr5Imm16Pred : PatLeaf<(i32 imm), [{
+ uint32_t v = (uint16_t)~N->getZExtValue();
+ return ImmIsSingleBit(v);
+}]>;
+
+def Set5Imm8 : SDNodeXForm<imm, [{
+ uint32_t imm = (uint8_t)N->getZExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def Set5Imm16 : SDNodeXForm<imm, [{
+ uint32_t imm = (uint16_t)N->getZExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def Set5Imm32 : SDNodeXForm<imm, [{
+ uint32_t imm = (uint32_t)N->getZExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def Clr5Imm8 : SDNodeXForm<imm, [{
+ uint32_t imm = (uint8_t)~N->getZExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def Clr5Imm16 : SDNodeXForm<imm, [{
+ uint32_t imm = (uint16_t)~N->getZExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def Clr5Imm32 : SDNodeXForm<imm, [{
+ int32_t imm = (int32_t)~N->getZExtValue();
+ return XformMskToBitPosU5Imm(imm, SDLoc(N));
+}]>;
+
+def NegImm8 : SDNodeXForm<imm, [{
+ int8_t V = N->getSExtValue();
+ return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
+}]>;
+
+def NegImm16 : SDNodeXForm<imm, [{
+ int16_t V = N->getSExtValue();
+ return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
+}]>;
+
+def NegImm32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
+
+multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
+ InstHexagon MI> {
+ // Addr: i32
+ def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
+ (MI I32:$Rs, 0, I32:$A)>;
+ // Addr: fi
+ def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
+ (MI AddrFI:$Rs, 0, I32:$A)>;
+}
+
+multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+ SDNode Oper, InstHexagon MI> {
+ // Addr: i32
+ def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
+ (add I32:$Rs, ImmPred:$Off)),
+ (MI I32:$Rs, imm:$Off, I32:$A)>;
+ def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A),
+ (orisadd I32:$Rs, ImmPred:$Off)),
+ (MI I32:$Rs, imm:$Off, I32:$A)>;
+ // Addr: fi
+ def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
+ (add AddrFI:$Rs, ImmPred:$Off)),
+ (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
+ def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
+ (orisadd AddrFI:$Rs, ImmPred:$Off)),
+ (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
+}
+
+multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+ SDNode Oper, InstHexagon MI> {
+ defm: Memopxr_simple_pat <Load, Store, Oper, MI>;
+ defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
+}
+
+let AddedComplexity = 180 in {
+ // add reg
+ defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
+ /*anyext*/ L4_add_memopb_io>;
+ defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
+ /*sext*/ L4_add_memopb_io>;
+ defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
+ /*zext*/ L4_add_memopb_io>;
+ defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
+ /*anyext*/ L4_add_memoph_io>;
+ defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
+ /*sext*/ L4_add_memoph_io>;
+ defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
+ /*zext*/ L4_add_memoph_io>;
+ defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
+
+ // sub reg
+ defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
+ /*anyext*/ L4_sub_memopb_io>;
+ defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
+ /*sext*/ L4_sub_memopb_io>;
+ defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
+ /*zext*/ L4_sub_memopb_io>;
+ defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
+ /*anyext*/ L4_sub_memoph_io>;
+ defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
+ /*sext*/ L4_sub_memoph_io>;
+ defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
+ /*zext*/ L4_sub_memoph_io>;
+ defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
+
+ // and reg
+ defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
+ /*anyext*/ L4_and_memopb_io>;
+ defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
+ /*sext*/ L4_and_memopb_io>;
+ defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
+ /*zext*/ L4_and_memopb_io>;
+ defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
+ /*anyext*/ L4_and_memoph_io>;
+ defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
+ /*sext*/ L4_and_memoph_io>;
+ defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
+ /*zext*/ L4_and_memoph_io>;
+ defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
+
+ // or reg
+ defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
+ /*anyext*/ L4_or_memopb_io>;
+ defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
+ /*sext*/ L4_or_memopb_io>;
+ defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
+ /*zext*/ L4_or_memopb_io>;
+ defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
+ /*anyext*/ L4_or_memoph_io>;
+ defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
+ /*sext*/ L4_or_memoph_io>;
+ defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
+ /*zext*/ L4_or_memoph_io>;
+ defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
+}
+
+
+multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
+ PatFrag Arg, SDNodeXForm ArgMod,
+ InstHexagon MI> {
+ // Addr: i32
+ def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
+ (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
+ // Addr: fi
+ def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
+ (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
+}
+
+multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+ SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
+ InstHexagon MI> {
+ // Addr: i32
+ def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
+ (add I32:$Rs, ImmPred:$Off)),
+ (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+ def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A),
+ (orisadd I32:$Rs, ImmPred:$Off)),
+ (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+ // Addr: fi
+ def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
+ (add AddrFI:$Rs, ImmPred:$Off)),
+ (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+ def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
+ (orisadd AddrFI:$Rs, ImmPred:$Off)),
+ (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+}
+
+multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+ SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
+ InstHexagon MI> {
+ defm: Memopxi_simple_pat <Load, Store, Oper, Arg, ArgMod, MI>;
+ defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
+}
+
+
+let AddedComplexity = 200 in {
+ // add imm
+ defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
+ /*anyext*/ IdImm, L4_iadd_memopb_io>;
+ defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
+ /*sext*/ IdImm, L4_iadd_memopb_io>;
+ defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
+ /*zext*/ IdImm, L4_iadd_memopb_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
+ /*anyext*/ IdImm, L4_iadd_memoph_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
+ /*sext*/ IdImm, L4_iadd_memoph_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
+ /*zext*/ IdImm, L4_iadd_memoph_io>;
+ defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
+ L4_iadd_memopw_io>;
+ defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
+ /*anyext*/ NegImm8, L4_iadd_memopb_io>;
+ defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
+ /*sext*/ NegImm8, L4_iadd_memopb_io>;
+ defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
+ /*zext*/ NegImm8, L4_iadd_memopb_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
+ /*anyext*/ NegImm16, L4_iadd_memoph_io>;
+ defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
+ /*sext*/ NegImm16, L4_iadd_memoph_io>;
+ defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
+ /*zext*/ NegImm16, L4_iadd_memoph_io>;
+ defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
+ L4_iadd_memopw_io>;
+
+ // sub imm
+ defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
+ /*anyext*/ IdImm, L4_isub_memopb_io>;
+ defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
+ /*sext*/ IdImm, L4_isub_memopb_io>;
+ defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
+ /*zext*/ IdImm, L4_isub_memopb_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
+ /*anyext*/ IdImm, L4_isub_memoph_io>;
+ defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
+ /*sext*/ IdImm, L4_isub_memoph_io>;
+ defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
+ /*zext*/ IdImm, L4_isub_memoph_io>;
+ defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
+ L4_isub_memopw_io>;
+ defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
+ /*anyext*/ NegImm8, L4_isub_memopb_io>;
+ defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
+ /*sext*/ NegImm8, L4_isub_memopb_io>;
+ defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
+ /*zext*/ NegImm8, L4_isub_memopb_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
+ /*anyext*/ NegImm16, L4_isub_memoph_io>;
+ defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
+ /*sext*/ NegImm16, L4_isub_memoph_io>;
+ defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
+ /*zext*/ NegImm16, L4_isub_memoph_io>;
+ defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
+ L4_isub_memopw_io>;
+
+ // clrbit imm
+ defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
+ /*anyext*/ Clr5Imm8, L4_iand_memopb_io>;
+ defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
+ /*sext*/ Clr5Imm8, L4_iand_memopb_io>;
+ defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
+ /*zext*/ Clr5Imm8, L4_iand_memopb_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
+ /*anyext*/ Clr5Imm16, L4_iand_memoph_io>;
+ defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
+ /*sext*/ Clr5Imm16, L4_iand_memoph_io>;
+ defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
+ /*zext*/ Clr5Imm16, L4_iand_memoph_io>;
+ defm: Memopxi_pat<load, store, u6_2ImmPred, and, Clr5ImmPred, Clr5Imm32,
+ L4_iand_memopw_io>;
+
+ // setbit imm
+ defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
+ /*anyext*/ Set5Imm8, L4_ior_memopb_io>;
+ defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
+ /*sext*/ Set5Imm8, L4_ior_memopb_io>;
+ defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
+ /*zext*/ Set5Imm8, L4_ior_memopb_io>;
+ defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
+ /*anyext*/ Set5Imm16, L4_ior_memoph_io>;
+ defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
+ /*sext*/ Set5Imm16, L4_ior_memoph_io>;
+ defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
+ /*zext*/ Set5Imm16, L4_ior_memoph_io>;
+ defm: Memopxi_pat<load, store, u6_2ImmPred, or, Set5ImmPred, Set5Imm32,
+ L4_ior_memopw_io>;
+}
+
+def : T_CMP_pat <C4_cmpneqi, setne, s32_0ImmPred>;
+def : T_CMP_pat <C4_cmpltei, setle, s32_0ImmPred>;
+def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>;
+
+// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
+ (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32_0ImmPred:$src2))>;
+
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne (i32 IntRegs:$src1), s32_0ImmPred:$src2)),
+ (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>;
+
+// SDNode for converting immediate C to C-1.
+def DEC_CONST_BYTE : SDNodeXForm<imm, [{
+ // Return the byte immediate const-1 as an SDNode.
+ int32_t imm = N->getSExtValue();
+ return XformU7ToU7M1Imm(imm, SDLoc(N));
+}]>;
+
+// For the sequence
+// zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+// biasing the expression by 48. We are depending on the representation of
+// the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+// retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+// retval = (c-48) < 10 ? 1 : 0;
+let AddedComplexity = 139 in
+def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
+ u7_0StrictPosImmPred:$src2)))),
+ (C2_muxii (A4_cmpbgtui IntRegs:$src1,
+ (DEC_CONST_BYTE u7_0StrictPosImmPred:$src2)),
+ 0, 1)>;
+
+class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
+ : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
+
+class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
+
+class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
+ : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
+
+class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
+ InstHexagon MI>
+ : Pat<(Store Value:$val, Addr:$addr),
+ (MI Addr:$addr, (ValueMod Value:$val))>;
+
+let AddedComplexity = 30 in {
+ def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
+ def: Storea_pat<store, I32, addrga, PS_storeriabs>;
+ def: Storea_pat<store, I64, addrga, PS_storerdabs>;
+
+ def: Stoream_pat<truncstorei8, I64, addrga, LoReg, PS_storerbabs>;
+ def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>;
+ def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
+}
+
+def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, S2_storerbgp>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>;
+ def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+ def: Storea_pat<store, I32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, I64, addrgp, S2_storerdgp>;
+
+ // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+ // to "r0 = 1; memw(#foo) = r0"
+ let AddedComplexity = 100 in
+ def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+ (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
+}
+
+class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+ : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
+ (VT (MI tglobaladdr:$absaddr))>;
+
+let AddedComplexity = 30 in {
+ def: LoadAbs_pats <load, PS_loadriabs>;
+ def: LoadAbs_pats <zextloadi1, PS_loadrubabs>;
+ def: LoadAbs_pats <sextloadi8, PS_loadrbabs>;
+ def: LoadAbs_pats <extloadi8, PS_loadrubabs>;
+ def: LoadAbs_pats <zextloadi8, PS_loadrubabs>;
+ def: LoadAbs_pats <sextloadi16, PS_loadrhabs>;
+ def: LoadAbs_pats <extloadi16, PS_loadruhabs>;
+ def: LoadAbs_pats <zextloadi16, PS_loadruhabs>;
+ def: LoadAbs_pats <load, PS_loadrdabs, i64>;
+}
+
+let AddedComplexity = 30 in
+def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
+ (Zext64 (PS_loadrubabs tglobaladdr:$absaddr))>;
+
+def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>;
+def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
+
+def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>;
+def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
+
+// Map from load(globaladdress) -> mem[u][bhwd](#foo)
+class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+ : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
+ (VT (MI tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <extloadi8, L2_loadrubgp>;
+ def: LoadGP_pats <sextloadi8, L2_loadrbgp>;
+ def: LoadGP_pats <zextloadi8, L2_loadrubgp>;
+ def: LoadGP_pats <extloadi16, L2_loadruhgp>;
+ def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
+ def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
+ def: LoadGP_pats <load, L2_loadrigp>;
+ def: LoadGP_pats <load, L2_loadrdgp, i64>;
+}
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <extloadi1, L2_loadrubgp>;
+ def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
+}
+
+// Transfer global address into a register
+def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
+def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>;
+def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>;
+
+let AddedComplexity = 30 in {
+ def: Storea_pat<truncstorei8, I32, u32_0ImmPred, PS_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
+ def: Storea_pat<store, I32, u32_0ImmPred, PS_storeriabs>;
+}
+
+let AddedComplexity = 30 in {
+ def: Loada_pat<load, i32, u32_0ImmPred, PS_loadriabs>;
+ def: Loada_pat<sextloadi8, i32, u32_0ImmPred, PS_loadrbabs>;
+ def: Loada_pat<zextloadi8, i32, u32_0ImmPred, PS_loadrubabs>;
+ def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
+ def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
+}
+
+// Indexed store word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 100 in
+defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
+
+// Load from a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+ def: Loada_pat<extloadi8, i32, addrga, PS_loadrubabs>;
+ def: Loada_pat<sextloadi8, i32, addrga, PS_loadrbabs>;
+ def: Loada_pat<zextloadi8, i32, addrga, PS_loadrubabs>;
+
+ def: Loada_pat<extloadi16, i32, addrga, PS_loadruhabs>;
+ def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>;
+ def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>;
+
+ def: Loada_pat<load, i32, addrga, PS_loadriabs>;
+ def: Loada_pat<load, i64, addrga, PS_loadrdabs>;
+}
+
+// Store to a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrga, PS_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
+ def: Storea_pat<store, I32, addrga, PS_storeriabs>;
+ def: Storea_pat<store, I64, addrga, PS_storerdabs>;
+
+ def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
+}
+
+// i8/i16/i32 -> i64 loads
+// We need a complexity of 120 here to override preceding handling of
+// zextload.
+let AddedComplexity = 120 in {
+ def: Loadam_pat<extloadi8, i64, addrga, Zext64, PS_loadrubabs>;
+ def: Loadam_pat<sextloadi8, i64, addrga, Sext64, PS_loadrbabs>;
+ def: Loadam_pat<zextloadi8, i64, addrga, Zext64, PS_loadrubabs>;
+
+ def: Loadam_pat<extloadi16, i64, addrga, Zext64, PS_loadruhabs>;
+ def: Loadam_pat<sextloadi16, i64, addrga, Sext64, PS_loadrhabs>;
+ def: Loadam_pat<zextloadi16, i64, addrga, Zext64, PS_loadruhabs>;
+
+ def: Loadam_pat<extloadi32, i64, addrga, Zext64, PS_loadriabs>;
+ def: Loadam_pat<sextloadi32, i64, addrga, Sext64, PS_loadriabs>;
+ def: Loadam_pat<zextloadi32, i64, addrga, Zext64, PS_loadriabs>;
+}
+
+let AddedComplexity = 100 in {
+ def: Loada_pat<extloadi8, i32, addrgp, PS_loadrubabs>;
+ def: Loada_pat<sextloadi8, i32, addrgp, PS_loadrbabs>;
+ def: Loada_pat<zextloadi8, i32, addrgp, PS_loadrubabs>;
+
+ def: Loada_pat<extloadi16, i32, addrgp, PS_loadruhabs>;
+ def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>;
+ def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>;
+
+ def: Loada_pat<load, i32, addrgp, PS_loadriabs>;
+ def: Loada_pat<load, i64, addrgp, PS_loadrdabs>;
+}
+
+let AddedComplexity = 100 in {
+ def: Storea_pat<truncstorei8, I32, addrgp, PS_storerbabs>;
+ def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>;
+ def: Storea_pat<store, I32, addrgp, PS_storeriabs>;
+ def: Storea_pat<store, I64, addrgp, PS_storerdabs>;
+}
+
+def: Loada_pat<atomic_load_8, i32, addrgp, PS_loadrubabs>;
+def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>;
+def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>;
+def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>;
+
+def: Storea_pat<SwapSt<atomic_store_8>, I32, addrgp, PS_storerbabs>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
+
+def: Pat<(or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
+ (i32 16)),
+ (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
+ (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
+ (i32 32))),
+ (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))),
+ (Insert4 IntRegs:$a, IntRegs:$b, IntRegs:$c, IntRegs:$d)>;
+
+// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
+// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
+// We don't really want either one here.
+def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
+def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
+ [SDNPHasChain]>;
+
+def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
+ (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
+def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
+ (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
+
+def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
+def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
+
+def ftoi : SDNodeXForm<fpimm, [{
+ APInt I = N->getValueAPF().bitcastToAPInt();
+ return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
+ MVT::getIntegerVT(I.getBitWidth()));
+}]>;
+
+
+def: Pat<(sra (i64 (add (i64 (sra I64:$src1, u6_0ImmPred:$src2)), 1)), (i32 1)),
+ (S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
+
+def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisVT<1, i64>]>;
+
+def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
+
+def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
+
+let AddedComplexity = 20 in {
+ defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+ defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
+}
+
+let AddedComplexity = 60 in {
+ defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
+ defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
+}
+
+let AddedComplexity = 40 in {
+ def: Loadxs_pat<load, f32, L4_loadri_rr>;
+ def: Loadxs_pat<load, f64, L4_loadrd_rr>;
+}
+
+let AddedComplexity = 20 in {
+ def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
+ def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
+}
+
+let AddedComplexity = 80 in {
+ def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>;
+ def: Loada_pat<load, f32, addrga, PS_loadriabs>;
+ def: Loada_pat<load, f64, addrga, PS_loadrdabs>;
+}
+
+let AddedComplexity = 100 in {
+ def: LoadGP_pats <load, L2_loadrigp, f32>;
+ def: LoadGP_pats <load, L2_loadrdgp, f64>;
+}
+
+let AddedComplexity = 20 in {
+ defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+ defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+}
+
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
+let AddedComplexity = 60 in {
+ defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
+ defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
+}
+
+let AddedComplexity = 40 in {
+ def: Storexs_pat<store, F32, S4_storeri_rr>;
+ def: Storexs_pat<store, F64, S4_storerd_rr>;
+}
+
+let AddedComplexity = 20 in {
+ def: Store_rr_pat<store, F32, S4_storeri_rr>;
+ def: Store_rr_pat<store, F64, S4_storerd_rr>;
+}
+
+let AddedComplexity = 80 in {
+ def: Storea_pat<store, F32, addrga, PS_storeriabs>;
+ def: Storea_pat<store, F64, addrga, PS_storerdabs>;
+}
+
+let AddedComplexity = 100 in {
+ def: Storea_pat<store, F32, addrgp, S2_storerigp>;
+ def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
+}
+
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
+def: Pat<(fadd F32:$src1, F32:$src2),
+ (F2_sfadd F32:$src1, F32:$src2)>;
+
+def: Pat<(fsub F32:$src1, F32:$src2),
+ (F2_sfsub F32:$src1, F32:$src2)>;
+
+def: Pat<(fmul F32:$src1, F32:$src2),
+ (F2_sfmpy F32:$src1, F32:$src2)>;
+
+let Predicates = [HasV5T] in {
+ def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
+ def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
+}
+
+let AddedComplexity = 100, Predicates = [HasV5T] in {
+ class SfSel12<PatFrag Cmp, InstHexagon MI>
+ : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
+ (MI F32:$Rs, F32:$Rt)>;
+ class SfSel21<PatFrag Cmp, InstHexagon MI>
+ : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs),
+ (MI F32:$Rs, F32:$Rt)>;
+
+ def: SfSel12<setolt, F2_sfmin>;
+ def: SfSel12<setole, F2_sfmin>;
+ def: SfSel12<setogt, F2_sfmax>;
+ def: SfSel12<setoge, F2_sfmax>;
+ def: SfSel21<setolt, F2_sfmax>;
+ def: SfSel21<setole, F2_sfmax>;
+ def: SfSel21<setogt, F2_sfmin>;
+ def: SfSel21<setoge, F2_sfmin>;
+}
+
+class T_fcmp32_pat<PatFrag OpNode, InstHexagon MI>
+ : Pat<(i1 (OpNode F32:$src1, F32:$src2)),
+ (MI F32:$src1, F32:$src2)>;
+class T_fcmp64_pat<PatFrag OpNode, InstHexagon MI>
+ : Pat<(i1 (OpNode F64:$src1, F64:$src2)),
+ (MI F64:$src1, F64:$src2)>;
+
+def: T_fcmp32_pat<setoge, F2_sfcmpge>;
+def: T_fcmp32_pat<setuo, F2_sfcmpuo>;
+def: T_fcmp32_pat<setoeq, F2_sfcmpeq>;
+def: T_fcmp32_pat<setogt, F2_sfcmpgt>;
+
+def: T_fcmp64_pat<setoge, F2_dfcmpge>;
+def: T_fcmp64_pat<setuo, F2_dfcmpuo>;
+def: T_fcmp64_pat<setoeq, F2_dfcmpeq>;
+def: T_fcmp64_pat<setogt, F2_dfcmpgt>;
+
+let Predicates = [HasV5T] in
+multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+ (IntMI F32:$src1, F32:$src2)>;
+ // DoubleRegs
+ def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+}
+
+defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (IntMI F32:$src1, F32:$src2))>;
+
+ // DoubleRegs
+ def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
+defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
+// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
+ InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (C2_not (IntMI F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (IntMI F32:$src1, F32:$src2)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (IntMI F32:$src1, F32:$src2)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (C2_not (IntMI F32:$src1, F32:$src2))>;
+
+ // DoubleRegs
+ def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+ def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+ def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (DoubleMI F64:$src1, F64:$src2)>;
+ def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
+// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
+ InstHexagon DoubleMI> {
+ // IntRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (C2_not (IntMI F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (IntMI F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+ (IntMI F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+ (C2_not (IntMI F32:$src2, F32:$src1))>;
+
+ // DoubleRegs
+ def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+ (DoubleMI F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (DoubleMI F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+ (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+}
+
+defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
+
+
+// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (seto F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)),
+ (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
+ def: Pat<(i1 (seto F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)),
+ (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>;
+}
+
+// Ordered lt.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
+ (F2_sfcmpgt F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)),
+ (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+ def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
+ (F2_dfcmpgt F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)),
+ (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
+}
+
+// Unordered lt.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setult F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (F2_sfcmpgt F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
+ (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
+ def: Pat<(i1 (setult F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (F2_dfcmpgt F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
+ (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>;
+}
+
+// Ordered le.
+let Predicates = [HasV5T] in {
+ // rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setole F32:$src1, F32:$src2)),
+ (F2_sfcmpge F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)),
+ (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+
+ // Rss <= Rtt -> Rtt >= Rss.
+ def: Pat<(i1 (setole F64:$src1, F64:$src2)),
+ (F2_dfcmpge F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)),
+ (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
+}
+
+// Unordered le.
+let Predicates = [HasV5T] in {
+// rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setule F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (F2_sfcmpge F32:$src2, F32:$src1))>;
+ def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
+ (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
+ def: Pat<(i1 (setule F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (F2_dfcmpge F64:$src2, F64:$src1))>;
+ def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
+ (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>;
+}
+
+// Ordered ne.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setone F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (setone F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+ def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
+ def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
+}
+
+// Unordered ne.
+let Predicates = [HasV5T] in {
+ def: Pat<(i1 (setune F32:$src1, F32:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
+ def: Pat<(i1 (setune F64:$src1, F64:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
+ def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)),
+ (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
+ (C2_not (F2_sfcmpeq F32:$src1,
+ (f32 (A2_tfrsi (ftoi $src2))))))>;
+ def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)),
+ (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
+ (C2_not (F2_dfcmpeq F64:$src1,
+ (CONST64 (ftoi $src2)))))>;
+}
+
+// Besides set[o|u][comparions], we also need set[comparisons].
+let Predicates = [HasV5T] in {
+ // lt.
+ def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
+ (F2_sfcmpgt F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)),
+ (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+ def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
+ (F2_dfcmpgt F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)),
+ (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
+
+ // le.
+ // rs <= rt -> rt >= rs.
+ def: Pat<(i1 (setle F32:$src1, F32:$src2)),
+ (F2_sfcmpge F32:$src2, F32:$src1)>;
+ def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)),
+ (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+
+ // Rss <= Rtt -> Rtt >= Rss.
+ def: Pat<(i1 (setle F64:$src1, F64:$src2)),
+ (F2_dfcmpge F64:$src2, F64:$src1)>;
+ def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)),
+ (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
+
+ // ne.
+ def: Pat<(i1 (setne F32:$src1, F32:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+ def: Pat<(i1 (setne F64:$src1, F64:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+ def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)),
+ (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
+ def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)),
+ (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
+}
+
+
+def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>;
+def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>;
+
+def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>;
+def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>;
+def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>;
+def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>;
+
+def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>;
+def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>;
+def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>;
+def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>;
+
+def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>;
+def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>;
+def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>;
+def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>;
+
+def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>;
+def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>;
+def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>;
+def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>;
+
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+let Predicates = [HasV5T] in {
+ def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
+ def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
+ def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
+ def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
+}
+
+def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
+ (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
+
+def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
+ (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
+
+def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
+ (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
+
+def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm),
+ (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt),
+ (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+ (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+ (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+ (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+ (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+ Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = mux(p0, #i, r1)
+def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3),
+ (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>,
+ Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = mux(p0, r1, #i)
+def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3),
+ (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>,
+ Requires<[HasV5T]>;
+
+def: Pat<(i32 (fp_to_sint F64:$src1)),
+ (LoReg (F2_conv_df2d_chop F64:$src1))>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fabs (f32 IntRegs:$src1)),
+ (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+def : Pat <(fneg (f32 IntRegs:$src1)),
+ (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
+ Requires<[HasV5T]>;
+
+
+def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
+ return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
+ return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
+ return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
+ return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned stores
+ def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+ def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+ (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Aligned stores
+ def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+ def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+ (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+OFF into vector store.
+ let AddedComplexity = 10 in {
+ def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, s4_6ImmPred:$offset)),
+ (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+ def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
+ (add IntRegs:$src2, s4_6ImmPred:$offset)),
+ (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
+ (VTSgl VectorRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ // Fold Add R+OFF into vector store 128B.
+ def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, s4_7ImmPred:$offset)),
+ (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+ def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
+ (add IntRegs:$src2, s4_7ImmPred:$offset)),
+ (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+ (VTDbl VectorRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+ }
+}
+
+defm : vS32b_ai_pats <v64i8, v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64, v16i64>;
+
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+ // Aligned loads
+ def : Pat < (VTSgl (alignedload IntRegs:$addr)),
+ (V6_vL32b_ai IntRegs:$addr, 0) >,
+ Requires<[UseHVXSgl]>;
+ def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
+ (V6_vL32Ub_ai IntRegs:$addr, 0) >,
+ Requires<[UseHVXSgl]>;
+
+ // 128B Load
+ def : Pat < (VTDbl (alignedload IntRegs:$addr)),
+ (V6_vL32b_ai_128B IntRegs:$addr, 0) >,
+ Requires<[UseHVXDbl]>;
+ def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
+ (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
+ Requires<[UseHVXDbl]>;
+
+ // Fold Add R+OFF into vector load.
+ let AddedComplexity = 10 in {
+ def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
+ (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ Requires<[UseHVXDbl]>;
+ def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
+ (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+ Requires<[UseHVXDbl]>;
+
+ def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
+ (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ Requires<[UseHVXSgl]>;
+ def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
+ (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+ Requires<[UseHVXSgl]>;
+ }
+}
+
+defm : vL32b_ai_pats <v64i8, v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64, v16i64>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+ def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+ (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (PS_vstorerw_ai_128B IntRegs:$addr, 0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+ def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+ (PS_vstorerwu_ai_128B IntRegs:$addr, 0,
+ (VTDbl VecDblRegs128B:$src1))>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+ def : Pat<(VTSgl (alignedload I32:$addr)),
+ (PS_vloadrw_ai I32:$addr, 0)>,
+ Requires<[UseHVXSgl]>;
+ def : Pat<(VTSgl (unalignedload I32:$addr)),
+ (PS_vloadrwu_ai I32:$addr, 0)>,
+ Requires<[UseHVXSgl]>;
+
+ def : Pat<(VTDbl (alignedload I32:$addr)),
+ (PS_vloadrw_ai_128B I32:$addr, 0)>,
+ Requires<[UseHVXDbl]>;
+ def : Pat<(VTDbl (unalignedload I32:$addr)),
+ (PS_vloadrwu_ai_128B I32:$addr, 0)>,
+ Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+let Predicates = [HasV60T,UseHVXSgl] in {
+ def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt),
+ (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>;
+ def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt),
+ (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>;
+}
+let Predicates = [HasV60T,UseHVXDbl] in {
+ def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt),
+ (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>;
+ def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt),
+ (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>;
+}
+
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+ SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+ (v16i32 VectorRegs:$Vt))),
+ (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+ (v32i32 VecDblRegs:$Vt))),
+ (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+
+def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
+ SDTCisInt<3>]>;
+
+def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
+
+// 0 as the last argument denotes vpacke. 1 denotes vpacko
+def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
+ (v64i8 VectorRegs:$Vt), (i32 0))),
+ (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
+ (v64i8 VectorRegs:$Vt), (i32 1))),
+ (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
+ (v32i16 VectorRegs:$Vt), (i32 0))),
+ (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
+ (v32i16 VectorRegs:$Vt), (i32 1))),
+ (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
+ Requires<[UseHVXSgl]>;
+
+def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
+ (v128i8 VecDblRegs:$Vt), (i32 0))),
+ (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
+ (v128i8 VecDblRegs:$Vt), (i32 1))),
+ (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
+ (v64i16 VecDblRegs:$Vt), (i32 0))),
+ (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
+ (v64i16 VecDblRegs:$Vt), (i32 1))),
+ (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+ Requires<[UseHVXDbl]>;
+
+def V2I1: PatLeaf<(v2i1 PredRegs:$R)>;
+def V4I1: PatLeaf<(v4i1 PredRegs:$R)>;
+def V8I1: PatLeaf<(v8i1 PredRegs:$R)>;
+def V4I8: PatLeaf<(v4i8 IntRegs:$R)>;
+def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
+def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>;
+def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
+def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a IntRegs:$src))),
+ (b IntRegs:$src)>;
+ def : Pat <(a (bitconvert (b IntRegs:$src))),
+ (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+ def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+ (b DoubleRegs:$src)>;
+ def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+ (a DoubleRegs:$src)>;
+}
+
+// Bit convert vector types to integers.
+defm : bitconvert_32<v4i8, i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_64<v8i8, i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+
+def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
+ (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
+ (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
+ (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>;
+
+def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
+ (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
+ (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
+ (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>;
+
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+ (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+ : Pat <(Op Type:$Rss, Type:$Rtt),
+ (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh, add, V4I16>;
+def: VArith_pat <A2_vaddw, add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh, sub, V4I16>;
+def: VArith_pat <A2_vsubw, sub, V2I32>;
+
+def: VArith_pat <A2_and, and, V2I16>;
+def: VArith_pat <A2_xor, xor, V2I16>;
+def: VArith_pat <A2_or, or, V2I16>;
+
+def: VArith_pat <A2_andp, and, V8I8>;
+def: VArith_pat <A2_andp, and, V4I16>;
+def: VArith_pat <A2_andp, and, V2I32>;
+def: VArith_pat <A2_orp, or, V8I8>;
+def: VArith_pat <A2_orp, or, V4I16>;
+def: VArith_pat <A2_orp, or, V2I32>;
+def: VArith_pat <A2_xorp, xor, V8I8>;
+def: VArith_pat <A2_xorp, xor, V4I16>;
+def: VArith_pat <A2_xorp, xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
+ (i32 u5_0ImmPred:$c))))),
+ (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
+ (i32 u5_0ImmPred:$c))))),
+ (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
+ (i32 u5_0ImmPred:$c))))),
+ (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+ (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+ (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+ (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
+ (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
+ (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
+ (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
+ (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
+ (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
+ (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(Op Value:$Rs, I32:$Rt),
+ (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+ : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+ (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+ : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+ (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
+ (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
+ (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+ (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+ (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+ (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+ (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+ (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+ (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+ (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+ ValueType CmpTy>
+ : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+ (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g. x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>;
+def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+ (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+ (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+ (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+ (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+ (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32. We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32. This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+ (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+ (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+ (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+ (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+ (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+ (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+ (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+ Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+ (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+ (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+ : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+ (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr),
+ [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+ (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+ (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+ [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+ (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+ (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+ (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
+
More information about the llvm-commits
mailing list