[llvm] 8ca2fc9 - [PowerPC] Refactor PPCInstrVSX.td
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Fri May 1 17:17:59 PDT 2020
Author: Nemanja Ivanovic
Date: 2020-05-01T19:17:39-05:00
New Revision: 8ca2fc9993c9e6e1f5b1ccaba925fd19059f719a
URL: https://github.com/llvm/llvm-project/commit/8ca2fc9993c9e6e1f5b1ccaba925fd19059f719a
DIFF: https://github.com/llvm/llvm-project/commit/8ca2fc9993c9e6e1f5b1ccaba925fd19059f719a.diff
LOG: [PowerPC] Refactor PPCInstrVSX.td
Over time, we have made many additions to this file and it has frankly become a
bit of a mess. This has led to at least one issue - we have a number of
instructions where the side effects flag should be set to false and we neglected
to do this. This patch suggests a refactoring that should make the file much
more maintainable. The file is split up into major sections and the nesting
level is reduced, predicate blocks merged, etc.
Sections:
- Custom PPCISD node definitions
- Predicate definitions
- Instruction formats
- Instruction definitions
- Helper DAG definitions
- Anonymous patterns
- Instruction aliases
Differential revision: https://reviews.llvm.org/D78132
Added:
Modified:
llvm/lib/Target/PowerPC/PPCInstrVSX.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 5dd73a2e8aee..7621913780d1 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -25,6 +25,32 @@
// ** in PPCVSXSwapRemoval::gatherVectorInstructions(). **
// ****************************************************************************
+// *********************************** NOTE ***********************************
+// ** When adding new anonymous patterns to this file, please add them to **
+// ** the section titled Anonymous Patterns. Chances are that the existing **
+// ** predicate blocks already contain a combination of features that you **
+// ** are after. There is a list of blocks at the top of the section. If **
+// ** you definitely need a new combination of predicates, please add that **
+// ** combination to the list. **
+// ** File Structure: **
+// ** - Custom PPCISD node definitions **
+// ** - Predicate definitions: predicates to specify the subtargets for **
+// ** which an instruction or pattern can be emitted. **
+// ** - Instruction formats: classes instantiated by the instructions. **
+// ** These generally correspond to instruction formats in section 1.6 of **
+// ** the ISA document. **
+// ** - Instruction definitions: the actual definitions of the instructions **
+// ** often including input patterns that they match. **
+// ** - Helper DAG definitions: We define a number of dag objects to use as **
+// ** input or output patterns for consciseness of the code. **
+// ** - Anonymous patterns: input patterns that an instruction matches can **
+// ** often not be specified as part of the instruction definition, so an **
+// ** anonymous pattern must be specified mapping an input pattern to an **
+// ** output pattern. These are generally guarded by subtarget predicates. **
+// ** - Instruction aliases: used to define extended mnemonics for assembly **
+// ** printing (for example: xxswapd for xxpermdi with 0x2 as the imm). **
+// ****************************************************************************
+
def PPCRegVSRCAsmOperand : AsmOperandClass {
let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
}
@@ -89,6 +115,7 @@ def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
+//--------------------------- Custom PPC nodes -------------------------------//
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
@@ -112,6 +139,21 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+//-------------------------- Predicate definitions ---------------------------//
+def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
+def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
+def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
+def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
+def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
+def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
+def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
+def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">;
+
+//--------------------- VSX-specific instruction formats ---------------------//
+// By default, all VSX instructions are to be selected over their Altivec
+// counter parts and they do not have unmodeled sideeffects.
+let AddedComplexity = 400, hasSideEffects = 0 in {
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
string asmstr, InstrItinClass itin, Intrinsic Int,
ValueType OutTy, ValueType InTy> {
@@ -144,14 +186,114 @@ class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let XB = XA;
}
-def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
-def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
-def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
-def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
+let Predicates = [HasVSX, HasP9Vector] in {
+class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
+
+// [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
+// So we use
diff erent operand class for VRB
+class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ RegisterOperand vbtype, list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
+ !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
+class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
+
+// [PO T XO B XO BX /]
+class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+ list<dag> pattern>
+ : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
+ !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
-let Predicates = [HasVSX] in {
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-let hasSideEffects = 0 in { // VSX instructions don't have side effects.
+// [PO T XO B XO BX TX]
+class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
+ !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
+
+// [PO T A B XO AX BX TX], src and dest register use
diff erent operand class
+class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
+ RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
+ InstrItinClass itin, list<dag> pattern>
+ : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
+ !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+
+// [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
+
+// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
+
+// [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
+ RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
+
+// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
+class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
+
+class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
+ list<dag> pattern>
+ : Z23Form_8<opcode, xo,
+ (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
+ !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
+ let RC = ex;
+}
+
+// [PO BF // VRA VRB XO /]
+class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
+ list<dag> pattern>
+ : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
+ let Pattern = pattern;
+}
+
+// [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has
diff erent
+// "out" and "in" dag
+class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
+ !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
+
+// [PO S RA RB XO SX]
+class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
+ RegisterOperand vtype, list<dag> pattern>
+ : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
+ !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
+} // Predicates = HasP9Vector
+} // AddedComplexity = 400, hasSideEffects = 0
+
+//-------------------------- Instruction definitions -------------------------//
+// VSX instructions require the VSX feature, they are to be selected over
+// equivalent Altivec patterns (as they address a larger register set) and
+// they do not have unmodeled side effects.
+let Predicates = [HasVSX], AddedComplexity = 400 in {
+let hasSideEffects = 0 in {
// Load indexed instructions
let mayLoad = 1, mayStore = 0 in {
@@ -835,7 +977,7 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
[(set vsrc:$XT,
(int_ppc_vsx_xvminsp vsrc:$XA, vsrc:$XB))]>;
} // isCommutable
-} // Uses = [RM], mayRaiseFPException
+ } // Uses = [RM], mayRaiseFPException
// Logical Instructions
let isCommutable = 1 in
@@ -924,1760 +1066,279 @@ let hasSideEffects = 0 in { // VSX instructions don't have side effects.
(outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
"xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
-} // hasSideEffects
+// The following VSX instructions were introduced in Power ISA 2.07
+let Predicates = [HasVSX, HasP8Vector] in {
+ let isCommutable = 1 in {
+ def XXLEQV : XX3Form<60, 186,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>;
+ def XXLNAND : XX3Form<60, 178,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
+ v4i32:$XB)))]>;
+ } // isCommutable
-// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
-// instruction selection into a branch sequence.
-let PPC970_Single = 1 in {
+ let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
+ isReMaterializable = 1 in {
+ def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
+ "xxleqv $XT, $XT, $XT", IIC_VecGeneral,
+ [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>;
+ }
- def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
- (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
- "#SELECT_CC_VSRC",
- []>;
- def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
- (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
- "#SELECT_VSRC",
- [(set v2f64:$dst,
- (select i1:$cond, v2f64:$T, v2f64:$F))]>;
- def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
- (ins crrc:$cond, f8rc:$T, f8rc:$F,
- i32imm:$BROPC), "#SELECT_CC_VSFRC",
- []>;
- def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
- (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
- "#SELECT_VSFRC",
- [(set f64:$dst,
- (select i1:$cond, f64:$T, f64:$F))]>;
- def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
- (ins crrc:$cond, f4rc:$T, f4rc:$F,
- i32imm:$BROPC), "#SELECT_CC_VSSRC",
- []>;
- def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
- (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
- "#SELECT_VSSRC",
- [(set f32:$dst,
- (select i1:$cond, f32:$T, f32:$F))]>;
-}
-} // AddedComplexity
+ def XXLORC : XX3Form<60, 170,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
-def : InstAlias<"xvmovdp $XT, $XB",
- (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
-def : InstAlias<"xvmovsp $XT, $XB",
- (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+ // VSX scalar loads introduced in ISA 2.07
+ let mayLoad = 1, mayStore = 0 in {
+ let CodeSize = 3 in
+ def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
+ "lxsspx $XT, $src", IIC_LdStLFD, []>;
+ def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwax $XT, $src", IIC_LdStLFD, []>;
+ def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsiwzx $XT, $src", IIC_LdStLFD, []>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
-def : InstAlias<"xxmrghd $XT, $XA, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
-def : InstAlias<"xxmrgld $XT, $XA, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
-def : InstAlias<"xxswapd $XT, $XB",
- (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
-def : InstAlias<"xxspltd $XT, $XB, 0",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
-def : InstAlias<"xxspltd $XT, $XB, 1",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
-def : InstAlias<"xxswapd $XT, $XB",
- (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
+ // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
+ let CodeSize = 3 in
+ def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
+ "#XFLOADf32",
+ [(set f32:$XT, (load xoaddr:$src))]>;
+ // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
+ def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWAX",
+ [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
+ // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
+ def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ "#LIWZX",
+ [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
+ } // mayLoad
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+ // VSX scalar stores introduced in ISA 2.07
+ let mayStore = 1, mayLoad = 0 in {
+ let CodeSize = 3 in
+ def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
+ "stxsspx $XT, $dst", IIC_LdStSTFD, []>;
+ def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
-def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
- (v4i32 (XXLNOR $A, $A))>;
-def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
- (and v4i32:$B, v4i32:$C))),
- (v4i32 (XXSEL $A, $B, $C))>;
+ // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
+ let CodeSize = 3 in
+ def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
+ "#XFSTOREf32",
+ [(store f32:$XT, xoaddr:$dst)]>;
+ // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
+ def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
+ "#STIWX",
+ [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
+ } // mayStore
-let Predicates = [IsBigEndian] in {
-def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+ // VSX Elementary Scalar FP arithmetic (SP)
+ let mayRaiseFPException = 1 in {
+ let isCommutable = 1 in {
+ def XSADDSP : XX3Form<60, 0,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>;
+ def XSMULSP : XX3Form<60, 16,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>;
+ } // isCommutable
-def : Pat<(f64 (extractelt v2f64:$S, 0)),
- (f64 (EXTRACT_SUBREG $S, sub_64))>;
-def : Pat<(f64 (extractelt v2f64:$S, 1)),
- (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-}
+ def XSSUBSP : XX3Form<60, 8,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xssubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>;
+ def XSDIVSP : XX3Form<60, 24,
+ (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
+ "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>;
+ } // mayRaiseFPException
-let Predicates = [IsLittleEndian] in {
-def : Pat<(v2f64 (scalar_to_vector f64:$A)),
- (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
- (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
+ def XSRESP : XX2Form<60, 26,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsresp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfre f32:$XB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
+ def XSRSP : XX2Form<60, 281,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xsrsp $XT, $XB", IIC_VecFP, []>;
+ def XSSQRTSP : XX2Form<60, 11,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xssqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set f32:$XT, (fsqrt f32:$XB))]>;
+ def XSRSQRTESP : XX2Form<60, 10,
+ (outs vssrc:$XT), (ins vssrc:$XB),
+ "xsrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
-def : Pat<(f64 (extractelt v2f64:$S, 0)),
- (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-def : Pat<(f64 (extractelt v2f64:$S, 1)),
- (f64 (EXTRACT_SUBREG $S, sub_64))>;
-}
+ // FMA Instructions
+ let BaseName = "XSMADDASP" in {
+ let isCommutable = 1 in
+ def XSMADDASP : XX3Form<60, 1,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
+ def XSMADDMSP : XX3Form<60, 9,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
-// Additional fnmsub patterns: -a*b + c == -(a*b - c)
-def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
- (XSNMSUBADP $C, $A, $B)>;
-def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
- (XSNMSUBADP $C, $A, $B)>;
+ let BaseName = "XSMSUBASP" in {
+ let isCommutable = 1 in
+ def XSMSUBASP : XX3Form<60, 17,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
+ def XSMSUBMSP : XX3Form<60, 25,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
-def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
- (XVNMSUBADP $C, $A, $B)>;
-def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
- (XVNMSUBADP $C, $A, $B)>;
+ let BaseName = "XSNMADDASP" in {
+ let isCommutable = 1 in
+ def XSNMADDASP : XX3Form<60, 129,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
+ def XSNMADDMSP : XX3Form<60, 137,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
-def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
- (XVNMSUBASP $C, $A, $B)>;
-def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
- (XVNMSUBASP $C, $A, $B)>;
-
-def : Pat<(v2f64 (bitconvert v4f32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v4i32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v8i16:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2f64 (bitconvert v16i8:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-
-def : Pat<(v4f32 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2i64 (bitconvert v4f32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v4i32:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v8i16:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-def : Pat<(v2i64 (bitconvert v16i8:$A)),
- (COPY_TO_REGCLASS $A, VSRC)>;
-
-def : Pat<(v4f32 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (bitconvert v2i64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v2i64 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (bitconvert v1i128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v1i128 (bitconvert v2f64:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2i64 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v4i32 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v8i16 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-def : Pat<(v16i8 (bitconvert f128:$A)),
- (COPY_TO_REGCLASS $A, VRRC)>;
-
-def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
- (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
-def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
- (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
-
-def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
- (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
-def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
- (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
-
-def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
-def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
-
-// Loads.
-let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
- def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
- // Stores.
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-}
-
-// Load vector big endian order
-let Predicates = [IsLittleEndian, HasVSX] in {
- def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
- def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
-}
-
-let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
- def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
- def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
- def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-}
-
-// Permutes.
-def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
-def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
-
-// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
-// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
-def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
-
-// Selects.
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
- (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
- (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
- (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
- (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
- (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
- (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
- (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
- (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
- (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
- (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
- (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
- (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
- (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
- (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
- (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
- (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
- (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
- (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
- (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
-def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
- (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
-// Divides.
-def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
- (XVDIVSP $A, $B)>;
-def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
- (XVDIVDP $A, $B)>;
-
-// Reciprocal estimate
-def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
- (XVRESP $A)>;
-def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
- (XVREDP $A)>;
-
-// Recip. square root estimate
-def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
- (XVRSQRTESP $A)>;
-def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
- (XVRSQRTEDP $A)>;
-
-// Vector selection
-def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
- (COPY_TO_REGCLASS
- (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
- (COPY_TO_REGCLASS $vB, VSRC),
- (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
-def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
- (COPY_TO_REGCLASS
- (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
- (COPY_TO_REGCLASS $vB, VSRC),
- (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
-def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
- (XXSEL $vC, $vB, $vA)>;
-def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
- (XXSEL $vC, $vB, $vA)>;
-
-def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
- (v4f32 (XVMAXSP $src1, $src2))>;
-def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
- (v4f32 (XVMINSP $src1, $src2))>;
-def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
- (v2f64 (XVMAXDP $src1, $src2))>;
-def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
- (v2f64 (XVMINDP $src1, $src2))>;
-
-let Predicates = [IsLittleEndian] in {
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
-} // IsLittleEndian
-
-let Predicates = [IsBigEndian] in {
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
-def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
-def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
- (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
-} // IsBigEndian
-
-} // AddedComplexity
-} // HasVSX
-
-def FpMinMax {
- dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
- (COPY_TO_REGCLASS $B, VSFRC)),
- VSSRC);
- dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
- (COPY_TO_REGCLASS $B, VSFRC)),
- VSSRC);
-}
-
-let AddedComplexity = 400, Predicates = [HasVSX] in {
- // f32 Min.
- def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Min)>;
- def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Min)>;
- // F32 Max.
- def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Max)>;
- def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
- (f32 FpMinMax.F32Max)>;
-
- // f64 Min.
- def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
- (f64 (XSMINDP $A, $B))>;
- def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
- (f64 (XSMINDP $A, $B))>;
- // f64 Max.
- def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
- (f64 (XSMAXDP $A, $B))>;
- def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
- (f64 (XSMAXDP $A, $B))>;
-}
-
-def ScalarLoads {
- dag Li8 = (i32 (extloadi8 xoaddr:$src));
- dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
- dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
- dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
- dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
-
- dag Li16 = (i32 (extloadi16 xoaddr:$src));
- dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
- dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
- dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
- dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
-
- dag Li32 = (i32 (load xoaddr:$src));
-}
-
-def DWToSPExtractConv {
- dag El0US1 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
- dag El1US1 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
- dag El0US2 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
- dag El1US2 = (f32 (PPCfcfidus
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
- dag El0SS1 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
- dag El1SS1 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
- dag El0SS2 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
- dag El1SS2 = (f32 (PPCfcfids
- (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
- dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
- dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
-}
-
-def WToDPExtractConv {
- dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
- dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
- dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
- dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
- dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
- dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
- dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
- dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
- dag BV02S = (v2f64 (build_vector El0S, El2S));
- dag BV13S = (v2f64 (build_vector El1S, El3S));
- dag BV02U = (v2f64 (build_vector El0U, El2U));
- dag BV13U = (v2f64 (build_vector El1U, El3U));
-}
-
-// The following VSX instructions were introduced in Power ISA 2.07
-/* FIXME: if the operands are v2i64, these patterns will not match.
- we should define new patterns or otherwise match the same patterns
- when the elements are larger than i32.
-*/
-def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
-def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
-def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
-let Predicates = [HasP8Vector] in {
-let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
- let isCommutable = 1 in {
- def XXLEQV : XX3Form<60, 186,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
- [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>;
- def XXLNAND : XX3Form<60, 178,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
- [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
- v4i32:$XB)))]>;
- } // isCommutable
-
- def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
- (XXLEQV $A, $B)>;
-
- let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
- isReMaterializable = 1 in {
- def XXLEQVOnes : XX3Form_SameOp<60, 186, (outs vsrc:$XT), (ins),
- "xxleqv $XT, $XT, $XT", IIC_VecGeneral,
- [(set v4i32:$XT, (bitconvert (v16i8 immAllOnesV)))]>;
- }
-
- def XXLORC : XX3Form<60, 170,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
- [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
-
- // VSX scalar loads introduced in ISA 2.07
- let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
- let CodeSize = 3 in
- def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
- "lxsspx $XT, $src", IIC_LdStLFD, []>;
- def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwax $XT, $src", IIC_LdStLFD, []>;
- def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwzx $XT, $src", IIC_LdStLFD, []>;
-
- // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
- let CodeSize = 3 in
- def XFLOADf32 : PseudoXFormMemOp<(outs vssrc:$XT), (ins memrr:$src),
- "#XFLOADf32",
- [(set f32:$XT, (load xoaddr:$src))]>;
- // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later
- def LIWAX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWAX",
- [(set f64:$XT, (PPClfiwax xoaddr:$src))]>;
- // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later
- def LIWZX : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
- "#LIWZX",
- [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
- } // mayLoad
-
- // VSX scalar stores introduced in ISA 2.07
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
- let CodeSize = 3 in
- def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
- "stxsspx $XT, $dst", IIC_LdStSTFD, []>;
- def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
- "stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
-
- // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
- let CodeSize = 3 in
- def XFSTOREf32 : PseudoXFormMemOp<(outs), (ins vssrc:$XT, memrr:$dst),
- "#XFSTOREf32",
- [(store f32:$XT, xoaddr:$dst)]>;
- // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later
- def STIWX : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
- "#STIWX",
- [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
- } // mayStore
-
- def : Pat<(f64 (extloadf32 xoaddr:$src)),
- (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
- (f32 (XFLOADf32 xoaddr:$src))>;
- def : Pat<(f64 (fpextend f32:$src)),
- (COPY_TO_REGCLASS $src, VSFRC)>;
-
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
- (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
- (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
- (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
- (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
- (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
- (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
- (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
- (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
- (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
- def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
- (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
-
- // VSX Elementary Scalar FP arithmetic (SP)
- let mayRaiseFPException = 1 in {
- let isCommutable = 1 in {
- def XSADDSP : XX3Form<60, 0,
- (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
- "xsaddsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (any_fadd f32:$XA, f32:$XB))]>;
- def XSMULSP : XX3Form<60, 16,
- (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
- "xsmulsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (any_fmul f32:$XA, f32:$XB))]>;
- } // isCommutable
- def XSSUBSP : XX3Form<60, 8,
- (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
- "xssubsp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (any_fsub f32:$XA, f32:$XB))]>;
- def XSDIVSP : XX3Form<60, 24,
- (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB),
- "xsdivsp $XT, $XA, $XB", IIC_FPDivS,
- [(set f32:$XT, (any_fdiv f32:$XA, f32:$XB))]>;
- } // mayRaiseFPException
- def XSRESP : XX2Form<60, 26,
- (outs vssrc:$XT), (ins vssrc:$XB),
- "xsresp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfre f32:$XB))]>;
- def XSRSP : XX2Form<60, 281,
- (outs vssrc:$XT), (ins vsfrc:$XB),
- "xsrsp $XT, $XB", IIC_VecFP, []>;
- def XSSQRTSP : XX2Form<60, 11,
- (outs vssrc:$XT), (ins vssrc:$XB),
- "xssqrtsp $XT, $XB", IIC_FPSqrtS,
- [(set f32:$XT, (fsqrt f32:$XB))]>;
- def XSRSQRTESP : XX2Form<60, 10,
- (outs vssrc:$XT), (ins vssrc:$XB),
- "xsrsqrtesp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfrsqrte f32:$XB))]>;
-
- // FMA Instructions
- let BaseName = "XSMADDASP" in {
- let isCommutable = 1 in
- def XSMADDASP : XX3Form<60, 1,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB, f32:$XTi))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
- def XSMADDMSP : XX3Form<60, 9,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- }
-
- let BaseName = "XSMSUBASP" in {
- let isCommutable = 1 in
- def XSMSUBASP : XX3Form<60, 17,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fma f32:$XA, f32:$XB,
- (fneg f32:$XTi)))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
- def XSMSUBMSP : XX3Form<60, 25,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- }
-
- let BaseName = "XSNMADDASP" in {
- let isCommutable = 1 in
- def XSNMADDASP : XX3Form<60, 129,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsnmaddasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
- f32:$XTi)))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
- def XSNMADDMSP : XX3Form<60, 137,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- }
-
- let BaseName = "XSNMSUBASP" in {
- let isCommutable = 1 in
- def XSNMSUBASP : XX3Form<60, 145,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
- [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
- (fneg f32:$XTi))))]>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- let IsVSXFMAAlt = 1 in
- def XSNMSUBMSP : XX3Form<60, 153,
- (outs vssrc:$XT),
- (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
- "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
- RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
- AltVSXFMARel;
- }
-
- // Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
- def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
- (XSNMSUBASP $C, $A, $B)>;
- def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
- (XSNMSUBASP $C, $A, $B)>;
-
- // Single Precision Conversions (FP <-> INT)
- def XSCVSXDSP : XX2Form<60, 312,
- (outs vssrc:$XT), (ins vsfrc:$XB),
- "xscvsxdsp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfcfids f64:$XB))]>;
- def XSCVUXDSP : XX2Form<60, 296,
- (outs vssrc:$XT), (ins vsfrc:$XB),
- "xscvuxdsp $XT, $XB", IIC_VecFP,
- [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
-
- // Conversions between vector and scalar single precision
- def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
- "xscvdpspn $XT, $XB", IIC_VecFP, []>;
- def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
- "xscvspdpn $XT, $XB", IIC_VecFP, []>;
-
- let Predicates = [IsLittleEndian] in {
- def : Pat<DWToSPExtractConv.El0SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El0US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS
- (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- }
-
- let Predicates = [IsBigEndian] in {
- def : Pat<DWToSPExtractConv.El0SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1SS1,
- (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- def : Pat<DWToSPExtractConv.El0US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
- def : Pat<DWToSPExtractConv.El1US1,
- (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- }
-
- // Instructions for converting float to i64 feeding a store.
- let Predicates = [NoP9Vector] in {
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
- }
-
- // Instructions for converting float to i32 feeding a store.
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
- (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
- (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
-
- def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
- def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
- (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
- (COPY_TO_REGCLASS $src2, VRRC)))>;
-} // AddedComplexity = 400
-} // HasP8Vector
-
-let AddedComplexity = 400 in {
-let Predicates = [HasDirectMove] in {
- // VSX direct move instructions
- def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
- "mfvsrd $rA, $XT", IIC_VecGeneral,
- [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
- Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
- def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
- "mfvsrd $rA, $XT", IIC_VecGeneral,
- []>,
- Requires<[In64BitMode]>;
- def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
- "mfvsrwz $rA, $XT", IIC_VecGeneral,
- [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
- let isCodeGenOnly = 1 in
- def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
- "mfvsrwz $rA, $XT", IIC_VecGeneral,
- []>;
- def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
- "mtvsrd $XT, $rA", IIC_VecGeneral,
- [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
- Requires<[In64BitMode]>;
- let isCodeGenOnly = 1 in
- def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
- "mtvsrd $XT, $rA", IIC_VecGeneral,
- []>,
- Requires<[In64BitMode]>;
- def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
- "mtvsrwa $XT, $rA", IIC_VecGeneral,
- [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
- let isCodeGenOnly = 1 in
- def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrwa $XT, $rA", IIC_VecGeneral,
- []>;
- def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
- "mtvsrwz $XT, $rA", IIC_VecGeneral,
- [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
- let isCodeGenOnly = 1 in
- def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrwz $XT, $rA", IIC_VecGeneral,
- []>;
-} // HasDirectMove
-
-let Predicates = [IsISA3_0, HasDirectMove] in {
- def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
-
- def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
- "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
- []>, Requires<[In64BitMode]>;
-
- def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
- "mfvsrld $rA, $XT", IIC_VecGeneral,
- []>, Requires<[In64BitMode]>;
-
-} // IsISA3_0, HasDirectMove
-} // AddedComplexity = 400
-
-// We want to parse this from asm, but we don't want to emit this as it would
-// be emitted with a VSX reg. So leave Emit = 0 here.
-def : InstAlias<"mfvrd $rA, $XT",
- (MFVRD g8rc:$rA, vrrc:$XT), 0>;
-def : InstAlias<"mffprd $rA, $src",
- (MFVSRD g8rc:$rA, f8rc:$src)>;
-def : InstAlias<"mtvrd $XT, $rA",
- (MTVRD vrrc:$XT, g8rc:$rA), 0>;
-def : InstAlias<"mtfprd $dst, $rA",
- (MTVSRD f8rc:$dst, g8rc:$rA)>;
-def : InstAlias<"mfvrwz $rA, $XT",
- (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
-def : InstAlias<"mffprwz $rA, $src",
- (MFVSRWZ gprc:$rA, f8rc:$src)>;
-def : InstAlias<"mtvrwa $XT, $rA",
- (MTVRWA vrrc:$XT, gprc:$rA), 0>;
-def : InstAlias<"mtfprwa $dst, $rA",
- (MTVSRWA f8rc:$dst, gprc:$rA)>;
-def : InstAlias<"mtvrwz $XT, $rA",
- (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
-def : InstAlias<"mtfprwz $dst, $rA",
- (MTVSRWZ f8rc:$dst, gprc:$rA)>;
-
-/* Direct moves of various widths from GPR's into VSR's. Each move lines
- the value up into element 0 (both BE and LE). Namely, entities smaller than
- a doubleword are shifted left and moved for BE. For LE, they're moved, then
- swapped to go into the least significant element of the VSR.
-*/
-def MovesToVSR {
- dag BE_BYTE_0 =
- (MTVSRD
- (RLDICR
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
- dag BE_HALF_0 =
- (MTVSRD
- (RLDICR
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
- dag BE_WORD_0 =
- (MTVSRD
- (RLDICR
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
- dag BE_DWORD_0 = (MTVSRD $A);
-
- dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
- dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
- LE_MTVSRW, sub_64));
- dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
- dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
- BE_DWORD_0, sub_64));
- dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
-}
-
-/* Patterns for extracting elements out of vectors. Integer elements are
- extracted using direct move operations. Patterns for extracting elements
- whose indices are not available at compile time are also provided with
- various _VARIABLE_ patterns.
- The numbering for the DAG's is for LE, but when used on BE, the correct
- LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
-*/
-def VectorExtractions {
- // Doubleword extraction
- dag LE_DWORD_0 =
- (MFVSRD
- (EXTRACT_SUBREG
- (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
- (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
- dag LE_DWORD_1 = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
-
- // Word extraction
- dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64));
- dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
- dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
- dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
-
- // Halfword extraction
- dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
- dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
- dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
- dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
- dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
- dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
- dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
- dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
-
- // Byte extraction
- dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
- dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
- dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
- dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
- dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
- dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
- dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
- dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
- dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
- dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
- dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
- dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
- dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
- dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
- dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
- dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
-
- /* Variable element number (BE and LE patterns must be specified separately)
- This is a rather involved process.
-
- Conceptually, this is how the move is accomplished:
- 1. Identify which doubleword contains the element
- 2. Shift in the VMX register so that the correct doubleword is correctly
- lined up for the MFVSRD
- 3. Perform the move so that the element (along with some extra stuff)
- is in the GPR
- 4. Right shift within the GPR so that the element is right-justified
-
- Of course, the index is an element number which has a
diff erent meaning
- on LE/BE so the patterns have to be specified separately.
-
- Note: The final result will be the element right-justified with high
- order bits being arbitrarily defined (namely, whatever was in the
- vector register to the left of the value originally).
- */
-
- /* LE variable byte
- Number 1. above:
- - For elements 0-7, we shift left by 8 bytes since they're on the right
- - For elements 8-15, we need not shift (shift left by zero bytes)
- This is accomplished by inverting the bits of the index and AND-ing
- with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
- */
- dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx)));
-
- // Number 2. above:
- // - Now that we set up the shift amount, we shift in the VMX register
- dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC));
-
- // Number 3. above:
- // - The doubleword containing our element is moved to a GPR
- dag LE_MV_VBYTE = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
- sub_64));
-
- /* Number 4. above:
- - Truncate the element number to the range 0-7 (8-15 are symmetrical
- and out of range values are truncated accordingly)
- - Multiply by 8 as we need to shift right by the number of bits, not bytes
- - Shift right in the GPR by the calculated value
- */
- dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
- sub_32);
- dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
- sub_32);
-
- /* LE variable halfword
- Number 1. above:
- - For elements 0-3, we shift left by 8 since they're on the right
- - For elements 4-7, we need not shift (shift left by zero bytes)
- Similarly to the byte pattern, we invert the bits of the index, but we
- AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
- Of course, the shift is still by 8 bytes, so we must multiply by 2.
- */
- dag LE_VHALF_PERM_VEC =
- (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62)));
-
- // Number 2. above:
- // - Now that we set up the shift amount, we shift in the VMX register
- dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC));
-
- // Number 3. above:
- // - The doubleword containing our element is moved to a GPR
- dag LE_MV_VHALF = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
- sub_64));
-
- /* Number 4. above:
- - Truncate the element number to the range 0-3 (4-7 are symmetrical
- and out of range values are truncated accordingly)
- - Multiply by 16 as we need to shift right by the number of bits
- - Shift right in the GPR by the calculated value
- */
- dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
- sub_32);
- dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
- sub_32);
-
- /* LE variable word
- Number 1. above:
- - For elements 0-1, we shift left by 8 since they're on the right
- - For elements 2-3, we need not shift
- */
- dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)));
-
- // Number 2. above:
- // - Now that we set up the shift amount, we shift in the VMX register
- dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC));
-
- // Number 3. above:
- // - The doubleword containing our element is moved to a GPR
- dag LE_MV_VWORD = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
- sub_64));
-
- /* Number 4. above:
- - Truncate the element number to the range 0-1 (2-3 are symmetrical
- and out of range values are truncated accordingly)
- - Multiply by 32 as we need to shift right by the number of bits
- - Shift right in the GPR by the calculated value
- */
- dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
- sub_32);
- dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
- sub_32);
-
- /* LE variable doubleword
- Number 1. above:
- - For element 0, we shift left by 8 since it's on the right
- - For element 1, we need not shift
- */
- dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)));
-
- // Number 2. above:
- // - Now that we set up the shift amount, we shift in the VMX register
- dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC));
-
- // Number 3. above:
- // - The doubleword containing our element is moved to a GPR
- // - Number 4. is not needed for the doubleword as the value is 64-bits
- dag LE_VARIABLE_DWORD =
- (MFVSRD (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
- sub_64));
-
- /* LE variable float
- - Shift the vector to line up the desired element to BE Word 0
- - Convert 32-bit float to a 64-bit single precision float
- */
- dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)));
- dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
- dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
-
- /* LE variable double
- Same as the LE doubleword except there is no move.
- */
- dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
- (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
- LE_VDWORD_PERM_VEC));
- dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
-
- /* BE variable byte
- The algorithm here is the same as the LE variable byte except:
- - The shift in the VMX register is by 0/8 for opposite element numbers so
- we simply AND the element number with 0x8
- - The order of elements after the move to GPR is reversed, so we invert
- the bits of the index prior to truncating to the range 0-7
- */
- dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8)));
- dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
- dag BE_MV_VBYTE = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
- sub_64));
- dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
- sub_32);
- dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
- sub_32);
-
- /* BE variable halfword
- The algorithm here is the same as the LE variable halfword except:
- - The shift in the VMX register is by 0/8 for opposite element numbers so
- we simply AND the element number with 0x4 and multiply by 2
- - The order of elements after the move to GPR is reversed, so we invert
- the bits of the index prior to truncating to the range 0-3
- */
- dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDI8_rec $Idx, 4), 1, 62)));
- dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
- dag BE_MV_VHALF = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
- sub_64));
- dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
- sub_32);
- dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
- sub_32);
-
- /* BE variable word
- The algorithm is the same as the LE variable word except:
- - The shift in the VMX register happens for opposite element numbers
- - The order of elements after the move to GPR is reversed, so we invert
- the bits of the index prior to truncating to the range 0-1
- */
- dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDI8_rec $Idx, 2), 2, 61)));
- dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
- dag BE_MV_VWORD = (MFVSRD
- (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
- sub_64));
- dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
- sub_32);
- dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
- sub_32);
-
- /* BE variable doubleword
- Same as the LE doubleword except we shift in the VMX register for opposite
- element indices.
- */
- dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
- (RLDICR (ANDI8_rec $Idx, 1), 3, 60)));
- dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
- dag BE_VARIABLE_DWORD =
- (MFVSRD (EXTRACT_SUBREG
- (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
- sub_64));
-
- /* BE variable float
- - Shift the vector to line up the desired element to BE Word 0
- - Convert 32-bit float to a 64-bit single precision float
- */
- dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61)));
- dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
- dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
-
- /* BE variable double
- Same as the BE doubleword except there is no move.
- */
- dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
- (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
- BE_VDWORD_PERM_VEC));
- dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
-}
-
-def NoP9Altivec : Predicate<"!PPCSubTarget->hasP9Altivec()">;
-let AddedComplexity = 400 in {
-// v4f32 scalar <-> vector conversions (BE)
-let Predicates = [IsBigEndian, HasP8Vector] in {
- def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (v4f32 (XSCVDPSPN $A))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 0)),
- (f32 (XSCVSPDPN $S))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 1)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 2)),
- (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 3)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
- (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
-} // IsBigEndian, HasP8Vector
-
-// Variable index vector_extract for v2f64 does not require P8Vector
-let Predicates = [IsBigEndian, HasVSX] in
- def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
- (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
-
-let Predicates = [IsBigEndian, HasDirectMove] in {
- // v16i8 scalar <-> vector conversions (BE)
- def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
- def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
- def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
-
- // v2i64 scalar <-> vector conversions (BE)
- def : Pat<(i64 (vector_extract v2i64:$S, 0)),
- (i64 VectorExtractions.LE_DWORD_1)>;
- def : Pat<(i64 (vector_extract v2i64:$S, 1)),
- (i64 VectorExtractions.LE_DWORD_0)>;
- def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
- (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
-} // IsBigEndian, HasDirectMove
-
-let Predicates = [IsBigEndian, HasDirectMove, NoP9Altivec] in {
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 VectorExtractions.LE_BYTE_15)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 VectorExtractions.LE_BYTE_14)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 VectorExtractions.LE_BYTE_13)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 VectorExtractions.LE_BYTE_12)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 VectorExtractions.LE_BYTE_11)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 VectorExtractions.LE_BYTE_10)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 VectorExtractions.LE_BYTE_9)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 VectorExtractions.LE_BYTE_8)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 VectorExtractions.LE_BYTE_7)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 VectorExtractions.LE_BYTE_6)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 VectorExtractions.LE_BYTE_5)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 VectorExtractions.LE_BYTE_4)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 VectorExtractions.LE_BYTE_3)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 VectorExtractions.LE_BYTE_2)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 VectorExtractions.LE_BYTE_1)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 VectorExtractions.LE_BYTE_0)>;
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
-
- // v8i16 scalar <-> vector conversions (BE)
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 VectorExtractions.LE_HALF_7)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 VectorExtractions.LE_HALF_6)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 VectorExtractions.LE_HALF_5)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 VectorExtractions.LE_HALF_4)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 VectorExtractions.LE_HALF_3)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 VectorExtractions.LE_HALF_2)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 VectorExtractions.LE_HALF_1)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 7)),
- (i32 VectorExtractions.LE_HALF_0)>;
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_HALF)>;
-
- // v4i32 scalar <-> vector conversions (BE)
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 VectorExtractions.LE_WORD_3)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_1)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 VectorExtractions.LE_WORD_0)>;
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 VectorExtractions.BE_VARIABLE_WORD)>;
-} // IsBigEndian, HasDirectMove, NoP9Altivec
-
-// v4f32 scalar <-> vector conversions (LE)
-let Predicates = [IsLittleEndian, HasP8Vector] in {
- def : Pat<(v4f32 (scalar_to_vector f32:$A)),
- (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 0)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 1)),
- (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 2)),
- (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
- def : Pat<(f32 (vector_extract v4f32:$S, 3)),
- (f32 (XSCVSPDPN $S))>;
- def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
- (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
-} // IsLittleEndian, HasP8Vector
-
-// Variable index vector_extract for v2f64 does not require P8Vector
-let Predicates = [IsLittleEndian, HasVSX] in
- def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
- (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
-
-def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
- (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
- (STXVW4X $rS, xoaddr:$dst)>;
-def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
-def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
-// Variable index unsigned vector_extract on Power9
-let Predicates = [HasP9Altivec, IsLittleEndian] in {
- def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
- (VEXTUBRX $Idx, $S)>;
-
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
- (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
- (VEXTUHRX (LI8 0), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
- (VEXTUHRX (LI8 2), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
- (VEXTUHRX (LI8 4), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
- (VEXTUHRX (LI8 6), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
- (VEXTUHRX (LI8 8), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
- (VEXTUHRX (LI8 10), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
- (VEXTUHRX (LI8 12), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
- (VEXTUHRX (LI8 14), $S)>;
-
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
- (VEXTUWRX (LI8 0), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
- (VEXTUWRX (LI8 4), $S)>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
- (VEXTUWRX (LI8 12), $S)>;
-
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
- (EXTSW (VEXTUWRX (LI8 0), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
- (EXTSW (VEXTUWRX (LI8 4), $S))>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
- (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
- (EXTSW (VEXTUWRX (LI8 12), $S))>;
-
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX
- (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX
- (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
- // For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
-}
-
-let Predicates = [HasP9Altivec, IsBigEndian] in {
- def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
- (VEXTUBLX $Idx, $S)>;
-
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
- (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
- (VEXTUHLX (LI8 0), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
- (VEXTUHLX (LI8 2), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
- (VEXTUHLX (LI8 4), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
- (VEXTUHLX (LI8 6), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
- (VEXTUHLX (LI8 8), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
- (VEXTUHLX (LI8 10), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
- (VEXTUHLX (LI8 12), $S)>;
- def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
- (VEXTUHLX (LI8 14), $S)>;
-
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
- (VEXTUWLX (LI8 0), $S)>;
-
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
- (VEXTUWLX (LI8 8), $S)>;
- def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
- (VEXTUWLX (LI8 12), $S)>;
-
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
- (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
- (EXTSW (VEXTUWLX (LI8 0), $S))>;
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
- (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 VectorExtractions.LE_WORD_2), sub_32))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
- (EXTSW (VEXTUWLX (LI8 8), $S))>;
- def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
- (EXTSW (VEXTUWLX (LI8 12), $S))>;
-
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX
- (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
-
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX
- (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
- // For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
-}
-
-let Predicates = [IsLittleEndian, HasDirectMove] in {
- // v16i8 scalar <-> vector conversions (LE)
- def : Pat<(v16i8 (scalar_to_vector i32:$A)),
- (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
- def : Pat<(v8i16 (scalar_to_vector i32:$A)),
- (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 MovesToVSR.LE_WORD_0)>;
- def : Pat<(v2i64 (scalar_to_vector i64:$A)),
- (v2i64 MovesToVSR.LE_DWORD_0)>;
- // v2i64 scalar <-> vector conversions (LE)
- def : Pat<(i64 (vector_extract v2i64:$S, 0)),
- (i64 VectorExtractions.LE_DWORD_0)>;
- def : Pat<(i64 (vector_extract v2i64:$S, 1)),
- (i64 VectorExtractions.LE_DWORD_1)>;
- def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
- (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
-} // IsLittleEndian, HasDirectMove
-
-let Predicates = [IsLittleEndian, HasDirectMove, NoP9Altivec] in {
- def : Pat<(i32 (vector_extract v16i8:$S, 0)),
- (i32 VectorExtractions.LE_BYTE_0)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 1)),
- (i32 VectorExtractions.LE_BYTE_1)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 2)),
- (i32 VectorExtractions.LE_BYTE_2)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 3)),
- (i32 VectorExtractions.LE_BYTE_3)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 4)),
- (i32 VectorExtractions.LE_BYTE_4)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 5)),
- (i32 VectorExtractions.LE_BYTE_5)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 6)),
- (i32 VectorExtractions.LE_BYTE_6)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 7)),
- (i32 VectorExtractions.LE_BYTE_7)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 8)),
- (i32 VectorExtractions.LE_BYTE_8)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 9)),
- (i32 VectorExtractions.LE_BYTE_9)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 10)),
- (i32 VectorExtractions.LE_BYTE_10)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 11)),
- (i32 VectorExtractions.LE_BYTE_11)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 12)),
- (i32 VectorExtractions.LE_BYTE_12)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 13)),
- (i32 VectorExtractions.LE_BYTE_13)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 14)),
- (i32 VectorExtractions.LE_BYTE_14)>;
- def : Pat<(i32 (vector_extract v16i8:$S, 15)),
- (i32 VectorExtractions.LE_BYTE_15)>;
- def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
-
- // v8i16 scalar <-> vector conversions (LE)
- def : Pat<(i32 (vector_extract v8i16:$S, 0)),
- (i32 VectorExtractions.LE_HALF_0)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 1)),
- (i32 VectorExtractions.LE_HALF_1)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 2)),
- (i32 VectorExtractions.LE_HALF_2)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 3)),
- (i32 VectorExtractions.LE_HALF_3)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 4)),
- (i32 VectorExtractions.LE_HALF_4)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 5)),
- (i32 VectorExtractions.LE_HALF_5)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 6)),
- (i32 VectorExtractions.LE_HALF_6)>;
- def : Pat<(i32 (vector_extract v8i16:$S, 7)),
- (i32 VectorExtractions.LE_HALF_7)>;
- def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_HALF)>;
-
- // v4i32 scalar <-> vector conversions (LE)
- def : Pat<(i32 (vector_extract v4i32:$S, 0)),
- (i32 VectorExtractions.LE_WORD_0)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 1)),
- (i32 VectorExtractions.LE_WORD_1)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 2)),
- (i32 VectorExtractions.LE_WORD_2)>;
- def : Pat<(i32 (vector_extract v4i32:$S, 3)),
- (i32 VectorExtractions.LE_WORD_3)>;
- def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
- (i32 VectorExtractions.LE_VARIABLE_WORD)>;
-} // IsLittleEndian, HasDirectMove, NoP9Altivec
-
-let Predicates = [HasDirectMove, HasVSX] in {
-// bitconvert f32 -> i32
-// (convert to 32-bit fp single, shift right 1 word, move to GPR)
-def : Pat<(i32 (bitconvert f32:$S)),
- (i32 (MFVSRWZ (EXTRACT_SUBREG
- (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
- sub_64)))>;
-// bitconvert i32 -> f32
-// (move to FPR, shift left 1 word, convert to 64-bit fp single)
-def : Pat<(f32 (bitconvert i32:$A)),
- (f32 (XSCVSPDPN
- (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
-
-// bitconvert f64 -> i64
-// (move to GPR, nothing else needed)
-def : Pat<(i64 (bitconvert f64:$S)),
- (i64 (MFVSRD $S))>;
-
-// bitconvert i64 -> f64
-// (move to FPR, nothing else needed)
-def : Pat<(f64 (bitconvert i64:$S)),
- (f64 (MTVSRD $S))>;
-
-// Rounding to integer.
-def : Pat<(i64 (lrint f64:$S)),
- (i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (lrint f32:$S)),
- (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (llrint f64:$S)),
- (i64 (MFVSRD (FCTID $S)))>;
-def : Pat<(i64 (llrint f32:$S)),
- (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
-def : Pat<(i64 (lround f64:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (lround f32:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-def : Pat<(i64 (llround f64:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
-def : Pat<(i64 (llround f32:$S)),
- (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
-}
+ let BaseName = "XSNMSUBASP" in {
+ let isCommutable = 1 in
+ def XSNMSUBASP : XX3Form<60, 145,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set f32:$XT, (fneg (fma f32:$XA, f32:$XB,
+ (fneg f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let IsVSXFMAAlt = 1, hasSideEffects = 1 in
+ def XSNMSUBMSP : XX3Form<60, 153,
+ (outs vssrc:$XT),
+ (ins vssrc:$XTi, vssrc:$XA, vssrc:$XB),
+ "xsnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
-let Predicates = [HasVSX] in {
-// Rounding for single precision.
-def : Pat<(f32 (fround f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPI
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fnearbyint f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIC
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (ffloor f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIM
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fceil f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIP
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (ftrunc f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIZ
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (frint f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIC
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
+ // Single Precision Conversions (FP <-> INT)
+ def XSCVSXDSP : XX2Form<60, 312,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvsxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+ def XSCVUXDSP : XX2Form<60, 296,
+ (outs vssrc:$XT), (ins vsfrc:$XB),
+ "xscvuxdsp $XT, $XB", IIC_VecFP,
+ [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
-// Rounding for double precision.
-def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
-def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
-}
+ // Conversions between vector and scalar single precision
+ def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
+ "xscvdpspn $XT, $XB", IIC_VecFP, []>;
+ def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
+ "xscvspdpn $XT, $XB", IIC_VecFP, []>;
-// Materialize a zero-vector of long long
-def : Pat<(v2i64 immAllZerosV),
- (v2i64 (XXLXORz))>;
-}
+ let Predicates = [HasVSX, HasDirectMove] in {
+ // VSX direct move instructions
+ def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+ Requires<[In64BitMode]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
+ def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
+ def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ []>;
+ def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+ Requires<[In64BitMode]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
+ def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ []>,
+ Requires<[In64BitMode]>;
+ def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
+ def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ []>;
+ def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let isCodeGenOnly = 1, hasSideEffects = 1 in
+ def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ []>;
+ } // HasDirectMove
-def AlignValues {
- dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
- dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
-}
+} // HasVSX, HasP8Vector
-// The following VSX instructions were introduced in Power ISA 3.0
-def HasP9Vector : Predicate<"PPCSubTarget->hasP9Vector()">;
-let AddedComplexity = 400, Predicates = [HasP9Vector] in {
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove] in {
+def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
+ "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
- // [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_XO5_VB5<opcode, xo2, xo, opc, pattern>, isRecordForm;
-
- // [PO VRT XO VRB XO /], but the VRB is only used the left 64 bits (or less),
- // So we use
diff erent operand class for VRB
- class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- RegisterOperand vbtype, list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
+ "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
+ []>, Requires<[In64BitMode]>;
- // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
- class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_XO5_VB5_VSFR<opcode, xo2, xo, opc, pattern>, isRecordForm;
+def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
+ "mfvsrld $rA, $XT", IIC_VecGeneral,
+ []>, Requires<[In64BitMode]>;
- // [PO T XO B XO BX /]
- class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
- list<dag> pattern>
- : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
- !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
-
- // [PO T XO B XO BX TX]
- class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX2_RD6_XO5_RS6<opcode, xo2, xo, (outs vtype:$XT), (ins vtype:$XB),
- !strconcat(opc, " $XT, $XB"), IIC_VecFP, pattern>;
-
- // [PO T A B XO AX BX TX], src and dest register use
diff erent operand class
- class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
- RegisterOperand xty, RegisterOperand aty, RegisterOperand bty,
- InstrItinClass itin, list<dag> pattern>
- : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
- !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
-
- // [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
-
- // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_VA5_VB5<opcode, xo, opc, pattern>, isRecordForm;
-
- // [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
-
- // [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
- class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : X_VT5_VA5_VB5_FMA<opcode, xo, opc, pattern>, isRecordForm;
+} // HasVSX, IsISA3_0, HasDirectMove
- //===--------------------------------------------------------------------===//
+let Predicates = [HasVSX, HasP9Vector] in {
// Quad-Precision Scalar Move Instructions:
-
// Copy Sign
def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
[(set f128:$vT,
@@ -2769,40 +1430,28 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(fneg (int_ppc_fmaf128_round_to_odd
f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
- // Additional fnmsub patterns: -a*b + c == -(a*b - c)
- def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
- def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
-
- //===--------------------------------------------------------------------===//
- // Quad/Double-Precision Compare Instructions:
-
- // [PO BF // VRA VRB XO /]
- class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
- list<dag> pattern>
- : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
- !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
- let Pattern = pattern;
- }
-
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
// QP Compare Ordered/Unordered
- def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
- def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
-
- // DP/QP Compare Exponents
- def XSCMPEXPDP : XX3Form_1<60, 59,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
- def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
-
- // DP Compare ==, >=, >, !=
- // Use vsrc for XT, because the entire register of XT is set.
- // XT.dword[1] = 0x0000_0000_0000_0000
- def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
- def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
- def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
- IIC_FPCompare, []>;
+ let hasSideEffects = 1 in {
+ def XSCMPOQP : X_BF3_VA5_VB5<63, 132, "xscmpoqp", []>;
+ def XSCMPUQP : X_BF3_VA5_VB5<63, 644, "xscmpuqp", []>;
+
+ // DP/QP Compare Exponents
+ def XSCMPEXPDP : XX3Form_1<60, 59,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
+
+ // DP Compare ==, >=, >, !=
+ // Use vsrc for XT, because the entire register of XT is set.
+ // XT.dword[1] = 0x0000_0000_0000_0000
+ def XSCMPEQDP : XX3_XT5_XA5_XB5<60, 3, "xscmpeqdp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ def XSCMPGEDP : XX3_XT5_XA5_XB5<60, 19, "xscmpgedp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ def XSCMPGTDP : XX3_XT5_XA5_XB5<60, 11, "xscmpgtdp", vsrc, vsfrc, vsfrc,
+ IIC_FPCompare, []>;
+ }
//===--------------------------------------------------------------------===//
// Quad-Precision Floating-Point Conversion Instructions:
@@ -2818,86 +1467,44 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(int_ppc_truncf128_round_to_odd
f128:$vB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
// Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
- def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
- def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
- def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
- def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
+ let hasSideEffects = 1 in {
+ def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
+ def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
+ def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
+ def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
+ }
// Convert (Un)Signed DWord -> QP.
def XSCVSDQP : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
- def : Pat<(f128 (sint_to_fp i64:$src)),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
- (f128 (XSCVSDQP $src))>;
- def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
-
def XSCVUDQP : X_VT5_XO5_VB5_TyVB<63, 2, 836, "xscvudqp", vfrc, []>;
- def : Pat<(f128 (uint_to_fp i64:$src)),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP $src))>;
-
- // Convert (Un)Signed Word -> QP.
- def : Pat<(f128 (sint_to_fp i32:$src)),
- (f128 (XSCVSDQP (MTVSRWA $src)))>;
- def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
- (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
- def : Pat<(f128 (uint_to_fp i32:$src)),
- (f128 (XSCVUDQP (MTVSRWZ $src)))>;
- def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
- (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
-
- //===--------------------------------------------------------------------===//
- // Round to Floating-Point Integer Instructions
// (Round &) Convert DP <-> HP
// Note! xscvdphp's src and dest register both use the left 64 bits, so we use
// vsfrc for src and dest register. xscvhpdp's src only use the left 16 bits,
// but we still use vsfrc for it.
- def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
- def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSCVDPHP : XX2_XT6_XO5_XB6<60, 17, 347, "xscvdphp", vsfrc, []>;
+ def XSCVHPDP : XX2_XT6_XO5_XB6<60, 16, 347, "xscvhpdp", vsfrc, []>;
+ }
// Vector HP -> SP
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
[(set v4f32:$XT,
(int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
- // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
- // separate pattern so that it can convert the input register class from
- // VRRC(v8i16) to VSRC.
- def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
- (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
-
- class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
- list<dag> pattern>
- : Z23Form_8<opcode, xo,
- (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
- !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
- let RC = ex;
- }
-
// Round to Quad-Precision Integer [with Inexact]
def XSRQPI : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 0, "xsrqpi" , []>;
def XSRQPIX : Z23_VT5_R1_VB5_RMC2_EX1<63, 5, 1, "xsrqpix", []>;
- // Use current rounding mode
- def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
- // Round to nearest, ties away from zero
- def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
- // Round towards Zero
- def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
- // Round towards +Inf
- def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
- // Round towards -Inf
- def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
-
- // Use current rounding mode, [with Inexact]
- def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
-
// Round Quad-Precision to Double-Extended Precision (fp80)
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
def XSRQPXP : Z23_VT5_R1_VB5_RMC2_EX1<63, 37, 0, "xsrqpxp", []>;
//===--------------------------------------------------------------------===//
@@ -2905,26 +1512,25 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// Insert Exponent DP/QP
// XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
- def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
- "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
- // vB NOTE: only vB.dword[0] is used, that's why we don't use
- // X_VT5_VA5_VB5 form
- def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
- "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
-
- def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
- (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
+ "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
+ // vB NOTE: only vB.dword[0] is used, that's why we don't use
+ // X_VT5_VA5_VB5 form
+ def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
+ "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ }
// Extract Exponent/Significand DP/QP
- def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
- def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
-
- def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
- def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSXEXPDP : XX2_RT5_XO5_XB6<60, 0, 347, "xsxexpdp", []>;
+ def XSXSIGDP : XX2_RT5_XO5_XB6<60, 1, 347, "xsxsigdp", []>;
- def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
- (i64 (MFVSRD (EXTRACT_SUBREG
- (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+ def XSXEXPQP : X_VT5_XO5_VB5 <63, 2, 804, "xsxexpqp", []>;
+ def XSXSIGQP : X_VT5_XO5_VB5 <63, 18, 804, "xsxsigqp", []>;
+ }
// Vector Insert Word
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
@@ -2937,6 +1543,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
// Vector Extract Unsigned Word
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
(outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
"xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
@@ -2961,26 +1569,19 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
[(set v4i32: $XT,
(int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
- let AddedComplexity = 400, Predicates = [HasP9Vector] in {
- // Extra patterns expanding to vector Extract Word/Insert Word
- def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
- (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
- def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
- (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
- } // AddedComplexity = 400, HasP9Vector
-
- //===--------------------------------------------------------------------===//
-
// Test Data Class SP/DP/QP
- def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
- (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
- "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
- def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
- (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
- "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
- def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
- (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
- "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
+ "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
+ def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
+ "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
+ def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
+ (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
+ "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
+ }
// Vector Test Data Class SP/DP
def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
@@ -2994,52 +1595,52 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
[(set v2i64: $XT,
(int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
- //===--------------------------------------------------------------------===//
-
// Maximum/Minimum Type-C/Type-J DP
def XSMAXCDP : XX3_XT5_XA5_XB5<60, 128, "xsmaxcdp", vsfrc, vsfrc, vsfrc,
IIC_VecFP,
[(set f64:$XT, (PPCxsmaxc f64:$XA, f64:$XB))]>;
- def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
def XSMINCDP : XX3_XT5_XA5_XB5<60, 136, "xsmincdp", vsfrc, vsfrc, vsfrc,
IIC_VecFP,
[(set f64:$XT, (PPCxsminc f64:$XA, f64:$XB))]>;
- def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
- IIC_VecFP, []>;
- //===--------------------------------------------------------------------===//
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XSMAXJDP : XX3_XT5_XA5_XB5<60, 144, "xsmaxjdp", vsrc, vsfrc, vsfrc,
+ IIC_VecFP, []>;
+ def XSMINJDP : XX3_XT5_XA5_XB5<60, 152, "xsminjdp", vsrc, vsfrc, vsfrc,
+ IIC_VecFP, []>;
+ }
// Vector Byte-Reverse H/W/D/Q Word
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
def XXBRH : XX2_XT6_XO5_XB6<60, 7, 475, "xxbrh", vsrc, []>;
def XXBRW : XX2_XT6_XO5_XB6<60, 15, 475, "xxbrw", vsrc,
[(set v4i32:$XT, (bswap v4i32:$XB))]>;
def XXBRD : XX2_XT6_XO5_XB6<60, 23, 475, "xxbrd", vsrc,
[(set v2i64:$XT, (bswap v2i64:$XB))]>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
def XXBRQ : XX2_XT6_XO5_XB6<60, 31, 475, "xxbrq", vsrc, []>;
- // Vector Reverse
- def : Pat<(v8i16 (bswap v8i16 :$A)),
- (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
- def : Pat<(v1i128 (bswap v1i128 :$A)),
- (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
-
// Vector Permute
- def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
- IIC_VecPerm, []>;
- def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
- IIC_VecPerm, []>;
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in {
+ def XXPERM : XX3_XT5_XA5_XB5<60, 26, "xxperm" , vsrc, vsrc, vsrc,
+ IIC_VecPerm, []>;
+ def XXPERMR : XX3_XT5_XA5_XB5<60, 58, "xxpermr", vsrc, vsrc, vsrc,
+ IIC_VecPerm, []>;
+ }
// Vector Splat Immediate Byte
+ // FIXME: Setting the hasSideEffects flag here to match current behaviour.
+ let hasSideEffects = 1 in
def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
"xxspltib $XT, $IMM8", IIC_VecPerm, []>;
- //===--------------------------------------------------------------------===//
- // Vector/Scalar Load/Store Instructions
-
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
+ let mayLoad = 1, mayStore = 0 in {
// Load Vector
def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
"lxv $XT, $src", IIC_LdStLFD, []>;
@@ -3050,13 +1651,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
"lxssp $vD, $src", IIC_LdStLFD, []>;
- // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has
diff erent
- // "out" and "in" dag
- class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
- !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
-
// Load as Integer Byte/Halfword & Zero Indexed
def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
[(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
@@ -3084,7 +1678,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
// When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
+ let mayStore = 1, mayLoad = 0 in {
// Store Vector
def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
"stxv $XT, $dst", IIC_LdStSTFD, []>;
@@ -3095,12 +1689,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
"stxssp $vS, $dst", IIC_LdStSTFD, []>;
- // [PO S RA RB XO SX]
- class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
- RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
- !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
-
// Store as Integer Byte/Halfword Indexed
def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
[(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
@@ -3132,732 +1720,452 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
i64:$rB)]>;
} // mayStore
- let Predicates = [IsLittleEndian] in {
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
- }
+ def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
+ "#DFLOADf32",
+ [(set f32:$XT, (load iaddrX4:$src))]>;
+ def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
+ "#DFLOADf64",
+ [(set f64:$XT, (load iaddrX4:$src))]>;
+ def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+ "#DFSTOREf32",
+ [(store f32:$XT, iaddrX4:$dst)]>;
+ def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+ "#DFSTOREf64",
+ [(store f64:$XT, iaddrX4:$dst)]>;
+
+ let mayStore = 1 in {
+ def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
+ (ins spilltovsrrc:$XT, memrr:$dst),
+ "#SPILLTOVSR_STX", []>;
+ def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
+ "#SPILLTOVSR_ST", []>;
+ }
+ let mayLoad = 1 in {
+ def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
+ (ins memrr:$src),
+ "#SPILLTOVSR_LDX", []>;
+ def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
+ "#SPILLTOVSR_LD", []>;
+
+ }
+ } // HasP9Vector
+} // hasSideEffects = 0
+
+let PPC970_Single = 1, AddedComplexity = 400 in {
+
+ def SELECT_CC_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
+ (ins crrc:$cond, vsrc:$T, vsrc:$F, i32imm:$BROPC),
+ "#SELECT_CC_VSRC",
+ []>;
+ def SELECT_VSRC: PPCCustomInserterPseudo<(outs vsrc:$dst),
+ (ins crbitrc:$cond, vsrc:$T, vsrc:$F),
+ "#SELECT_VSRC",
+ [(set v2f64:$dst,
+ (select i1:$cond, v2f64:$T, v2f64:$F))]>;
+ def SELECT_CC_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
+ (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSFRC",
+ []>;
+ def SELECT_VSFRC: PPCCustomInserterPseudo<(outs f8rc:$dst),
+ (ins crbitrc:$cond, f8rc:$T, f8rc:$F),
+ "#SELECT_VSFRC",
+ [(set f64:$dst,
+ (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_CC_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
+ (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VSSRC",
+ []>;
+ def SELECT_VSSRC: PPCCustomInserterPseudo<(outs f4rc:$dst),
+ (ins crbitrc:$cond, f4rc:$T, f4rc:$F),
+ "#SELECT_VSSRC",
+ [(set f32:$dst,
+ (select i1:$cond, f32:$T, f32:$F))]>;
+}
+}
+
+//----------------------------- DAG Definitions ------------------------------//
+def FpMinMax {
+ dag F32Min = (COPY_TO_REGCLASS (XSMINDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+ dag F32Max = (COPY_TO_REGCLASS (XSMAXDP (COPY_TO_REGCLASS $A, VSFRC),
+ (COPY_TO_REGCLASS $B, VSFRC)),
+ VSSRC);
+}
+
+def ScalarLoads {
+ dag Li8 = (i32 (extloadi8 xoaddr:$src));
+ dag ZELi8 = (i32 (zextloadi8 xoaddr:$src));
+ dag ZELi8i64 = (i64 (zextloadi8 xoaddr:$src));
+ dag SELi8 = (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
+ dag SELi8i64 = (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
+
+ dag Li16 = (i32 (extloadi16 xoaddr:$src));
+ dag ZELi16 = (i32 (zextloadi16 xoaddr:$src));
+ dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
+ dag SELi16 = (i32 (sextloadi16 xoaddr:$src));
+ dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
+
+ dag Li32 = (i32 (load xoaddr:$src));
+}
+
+def DWToSPExtractConv {
+ dag El0US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1US1 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1US2 = (f32 (PPCfcfidus
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag El0SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 0))))));
+ dag El1SS1 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S1, 1))))));
+ dag El0SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 0))))));
+ dag El1SS2 = (f32 (PPCfcfids
+ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S2, 1))))));
+ dag BVU = (v4f32 (build_vector El0US1, El1US1, El0US2, El1US2));
+ dag BVS = (v4f32 (build_vector El0SS1, El1SS1, El0SS2, El1SS2));
+}
+
+def WToDPExtractConv {
+ dag El0S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 0))));
+ dag El1S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 1))));
+ dag El2S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 2))));
+ dag El3S = (f64 (PPCfcfid (PPCmtvsra (extractelt v4i32:$A, 3))));
+ dag El0U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 0))));
+ dag El1U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 1))));
+ dag El2U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 2))));
+ dag El3U = (f64 (PPCfcfidu (PPCmtvsrz (extractelt v4i32:$A, 3))));
+ dag BV02S = (v2f64 (build_vector El0S, El2S));
+ dag BV13S = (v2f64 (build_vector El1S, El3S));
+ dag BV02U = (v2f64 (build_vector El0U, El2U));
+ dag BV13U = (v2f64 (build_vector El1U, El3U));
+}
+
+/* Direct moves of various widths from GPR's into VSR's. Each move lines
+ the value up into element 0 (both BE and LE). Namely, entities smaller than
+ a doubleword are shifted left and moved for BE. For LE, they're moved, then
+ swapped to go into the least significant element of the VSR.
+*/
+def MovesToVSR {
+ dag BE_BYTE_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+ dag BE_HALF_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+ dag BE_WORD_0 =
+ (MTVSRD
+ (RLDICR
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+ dag BE_DWORD_0 = (MTVSRD $A);
+
+ dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
+ dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ LE_MTVSRW, sub_64));
+ dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
+ dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ BE_DWORD_0, sub_64));
+ dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
+}
+
+/* Patterns for extracting elements out of vectors. Integer elements are
+ extracted using direct move operations. Patterns for extracting elements
+ whose indices are not available at compile time are also provided with
+ various _VARIABLE_ patterns.
+ The numbering for the DAG's is for LE, but when used on BE, the correct
+ LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
+*/
+def VectorExtractions {
+ // Doubleword extraction
+ dag LE_DWORD_0 =
+ (MFVSRD
+ (EXTRACT_SUBREG
+ (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
+ (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
+ dag LE_DWORD_1 = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+
+ // Word extraction
+ dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64));
+ dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
+ dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+ dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
+
+ // Halfword extraction
+ dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
+ dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
+ dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
+ dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
+ dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
+ dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
+ dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
+ dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
+
+ // Byte extraction
+ dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
+ dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
+ dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
+ dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
+ dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
+ dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
+ dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
+ dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
+ dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
+ dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
+ dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
+ dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
+ dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
+ dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
+ dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
+ dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
+
+ /* Variable element number (BE and LE patterns must be specified separately)
+ This is a rather involved process.
- let Predicates = [IsBigEndian] in {
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
- def: Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
- def: Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
- }
+ Conceptually, this is how the move is accomplished:
+ 1. Identify which doubleword contains the element
+ 2. Shift in the VMX register so that the correct doubleword is correctly
+ lined up for the MFVSRD
+ 3. Perform the move so that the element (along with some extra stuff)
+ is in the GPR
+ 4. Right shift within the GPR so that the element is right-justified
- // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
- // of f64
- def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
- (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
- def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
- (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
-
- // Patterns for which instructions from ISA 3.0 are a better match
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
-
- def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
- def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
- (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
-
- def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
- def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
- (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
- } // IsLittleEndian, HasP9Vector
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
- def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
- (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
- def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
- (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
- def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
- (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
- } // IsBigEndian, HasP9Vector
-
- // D-Form Load/Store
- def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
- (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
-
- def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
- (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
- def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
- (STXV $rS, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
- (STXV $rS, memrix16:$dst)>;
-
-
- def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
- def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)),
- (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>;
- def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst),
- (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
- def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
- (STXVX $rS, xoaddr:$dst)>;
+ Of course, the index is an element number which has a
diff erent meaning
+ on LE/BE so the patterns have to be specified separately.
- let AddedComplexity = 400 in {
- // LIWAX - This instruction is used for sign extending i32 -> i64.
- // LIWZX - This instruction will be emitted for i32, f32, and when
- // zero-extending i32 to i64 (zext i32 -> i64).
- let Predicates = [IsLittleEndian] in {
+ Note: The final result will be the element right-justified with high
+ order bits being arbitrarily defined (namely, whatever was in the
+ vector register to the left of the value originally).
+ */
- def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>;
+ /* LE variable byte
+ Number 1. above:
+ - For elements 0-7, we shift left by 8 bytes since they're on the right
+ - For elements 8-15, we need not shift (shift left by zero bytes)
+ This is accomplished by inverting the bits of the index and AND-ing
+ with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
+ */
+ dag LE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDC8 (LI8 8), $Idx)));
- def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, LE_VBYTE_PERM_VEC));
- def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
- (v4i32 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
- def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
- (v4f32 (XXPERMDIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
- }
+ /* Number 4. above:
+ - Truncate the element number to the range 0-7 (8-15 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 8 as we need to shift right by the number of bits, not bytes
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
+ sub_32);
- let Predicates = [IsBigEndian] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
+ /* LE variable halfword
+ Number 1. above:
+ - For elements 0-3, we shift left by 8 since they're on the right
+ - For elements 4-7, we need not shift (shift left by zero bytes)
+ Similarly to the byte pattern, we invert the bits of the index, but we
+ AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
+ Of course, the shift is still by 8 bytes, so we must multiply by 2.
+ */
+ dag LE_VHALF_PERM_VEC =
+ (v16i8 (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62)));
- def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
- (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, LE_VHALF_PERM_VEC));
- def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
- (v4i32 (XXSLDWIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
+ sub_64));
- def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
- (v4f32 (XXSLDWIs
- (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
- }
+ /* Number 4. above:
+ - Truncate the element number to the range 0-3 (4-7 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 16 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
+ sub_32);
- }
+ /* LE variable word
+ Number 1. above:
+ - For elements 0-1, we shift left by 8 since they're on the right
+ - For elements 2-3, we need not shift
+ */
+ dag LE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
+ (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)));
- // Build vectors from i8 loads
- def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
- (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
- def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
- (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
- (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
- (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
- (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
- (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
-
- // Build vectors from i16 loads
- def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
- (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
- (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
- (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
- def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
- (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
- def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
- (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
-
- // Load/convert and convert/store patterns for f16.
- def : Pat<(f64 (extloadf16 xoaddr:$src)),
- (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
- def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
- (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
- def : Pat<(f32 (extloadf16 xoaddr:$src)),
- (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
- def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
- (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- // Scalar stores of i8
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
-
- // Scalar stores of i16
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- } // IsBigEndian, HasP9Vector
-
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- // Scalar stores of i8
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
- (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
-
- // Scalar stores of i16
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
- def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
- (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
- } // IsLittleEndian, HasP9Vector
-
-
- // Vector sign extensions
- def : Pat<(f64 (PPCVexts f64:$A, 1)),
- (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
- def : Pat<(f64 (PPCVexts f64:$A, 2)),
- (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VWORD_PERM_VEC));
- def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
- "#DFLOADf32",
- [(set f32:$XT, (load iaddrX4:$src))]>;
- def DFLOADf64 : PPCPostRAExpPseudo<(outs vsfrc:$XT), (ins memrix:$src),
- "#DFLOADf64",
- [(set f64:$XT, (load iaddrX4:$src))]>;
- def DFSTOREf32 : PPCPostRAExpPseudo<(outs), (ins vssrc:$XT, memrix:$dst),
- "#DFSTOREf32",
- [(store f32:$XT, iaddrX4:$dst)]>;
- def DFSTOREf64 : PPCPostRAExpPseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
- "#DFSTOREf64",
- [(store f64:$XT, iaddrX4:$dst)]>;
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ dag LE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
+ sub_64));
- def : Pat<(f64 (extloadf32 iaddrX4:$src)),
- (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
- def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
- (f32 (DFLOADf32 iaddrX4:$src))>;
-
- def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
- (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
- def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
-
- let AddedComplexity = 400 in {
- // The following pseudoinstructions are used to ensure the utilization
- // of all 64 VSX registers.
- let Predicates = [IsLittleEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
- (v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
- (v2f64 (XXPERMDIs
- (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- iaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- } // IsLittleEndian, HasP9Vector
-
- let Predicates = [IsBigEndian, HasP9Vector] in {
- def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
- (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
- def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
- (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
-
- def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
- (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
- def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
- (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
- sub_64), iaddrX4:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
- (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
- } // IsBigEndian, HasP9Vector
- }
+ /* Number 4. above:
+ - Truncate the element number to the range 0-1 (2-3 are symmetrical
+ and out of range values are truncated accordingly)
+ - Multiply by 32 as we need to shift right by the number of bits
+ - Shift right in the GPR by the calculated value
+ */
+ dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
+ sub_32);
- let Predicates = [IsBigEndian, HasP9Vector] in {
-
- // (Un)Signed DWord vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
-
- // (Un)Signed Word vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
- foreach Idx = [0,2,3] in {
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
- }
- foreach Idx = 0-3 in {
- def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
- (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
- }
+ /* LE variable doubleword
+ Number 1. above:
+ - For element 0, we shift left by 8 since it's on the right
+ - For element 1, we need not shift
+ */
+ dag LE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
+ (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)));
- // (Un)Signed HWord vector extract -> QP
- foreach Idx = 0-7 in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v8i16:$src, Idx), i16)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
- sub_64)))>;
- // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
- (f128 (XSCVUDQP (EXTRACT_SUBREG
- (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
- }
+ // Number 2. above:
+ // - Now that we set up the shift amount, we shift in the VMX register
+ dag LE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, LE_VDWORD_PERM_VEC));
- // (Un)Signed Byte vector extract -> QP
- foreach Idx = 0-15 in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
- i8)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
- }
+ // Number 3. above:
+ // - The doubleword containing our element is moved to a GPR
+ // - Number 4. is not needed for the doubleword as the value is 64-bits
+ dag LE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
- // Unsigned int in vsx register -> QP
- def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP
- (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
- } // IsBigEndian, HasP9Vector
-
- let Predicates = [IsLittleEndian, HasP9Vector] in {
-
- // (Un)Signed DWord vector extract -> QP
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
- def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
- (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
-
- // (Un)Signed Word vector extract -> QP
- foreach Idx = [[0,3],[1,2],[3,0]] in {
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG
- (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
- sub_64)))>;
- }
- def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
- (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
+ /* LE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag LE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8,
+ (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)));
+ dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
+ dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
- foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
- def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
- (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
- }
+ /* LE variable double
+ Same as the LE doubleword except there is no move.
+ */
+ dag LE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
+ (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
+ LE_VDWORD_PERM_VEC));
+ dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
- // (Un)Signed HWord vector extract -> QP
- // The Nested foreach lists identifies the vector element and corresponding
- // register byte location.
- foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v8i16:$src, !head(Idx)), i16)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG (VEXTSH2D
- (VEXTRACTUH !head(!tail(Idx)), $src)),
- sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v8i16:$src, !head(Idx))),
- 65535))),
- (f128 (XSCVUDQP (EXTRACT_SUBREG
- (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
- }
+ /* BE variable byte
+ The algorithm here is the same as the LE variable byte except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x8
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-7
+ */
+ dag BE_VBYTE_PERM_VEC = (v16i8 (LVSL ZERO8, (ANDI8_rec $Idx, 8)));
+ dag BE_VBYTE_PERMUTE = (v16i8 (VPERM $S, $S, BE_VBYTE_PERM_VEC));
+ dag BE_MV_VBYTE = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
+ sub_32);
+ dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
+ sub_32);
- // (Un)Signed Byte vector extract -> QP
- foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
- [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
- def : Pat<(f128 (sint_to_fp
- (i32 (sext_inreg
- (vector_extract v16i8:$src, !head(Idx)), i8)))),
- (f128 (XSCVSDQP
- (EXTRACT_SUBREG
- (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
- sub_64)))>;
- def : Pat<(f128 (uint_to_fp
- (and (i32 (vector_extract v16i8:$src, !head(Idx))),
- 255))),
- (f128 (XSCVUDQP
- (EXTRACT_SUBREG
- (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
- }
+ /* BE variable halfword
+ The algorithm here is the same as the LE variable halfword except:
+ - The shift in the VMX register is by 0/8 for opposite element numbers so
+ we simply AND the element number with 0x4 and multiply by 2
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-3
+ */
+ dag BE_VHALF_PERM_VEC = (v16i8 (LVSL ZERO8,
+ (RLDICR (ANDI8_rec $Idx, 4), 1, 62)));
+ dag BE_VHALF_PERMUTE = (v16i8 (VPERM $S, $S, BE_VHALF_PERM_VEC));
+ dag BE_MV_VHALF = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
+ sub_32);
+ dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
+ sub_32);
- // Unsigned int in vsx register -> QP
- def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
- (f128 (XSCVUDQP
- (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
- } // IsLittleEndian, HasP9Vector
-
- // Convert (Un)Signed DWord in memory -> QP
- def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
- (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
- def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
- (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
- (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
- def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
- (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
-
- // Convert Unsigned HWord in memory -> QP
- def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
- (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>;
-
- // Convert Unsigned Byte in memory -> QP
- def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
- (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
-
- // Truncate & Convert QP -> (Un)Signed (D)Word.
- def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
- def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
- def : Pat<(i32 (fp_to_sint f128:$src)),
- (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
- def : Pat<(i32 (fp_to_uint f128:$src)),
- (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
-
- // Instructions for store(fptosi).
- // The 8-byte version is repeated here due to availability of D-Form STXSD.
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
- (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
- (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1),
- (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
-
- // Instructions for store(fptoui).
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
- (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
- (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
- (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
-
- // Round & Convert QP -> DP/SP
- def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>;
- def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
-
- // Convert SP -> QP
- def : Pat<(f128 (fpextend f32:$src)),
- (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
-
- def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
- (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
- (COPY_TO_REGCLASS $XB, VSSRC)),
- VSSRC))>;
- def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
- (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
- (COPY_TO_REGCLASS $XB, VSSRC)),
- VSSRC))>;
-
-} // end HasP9Vector, AddedComplexity
+ /* BE variable word
+ The algorithm is the same as the LE variable word except:
+ - The shift in the VMX register happens for opposite element numbers
+ - The order of elements after the move to GPR is reversed, so we invert
+ the bits of the index prior to truncating to the range 0-1
+ */
+ dag BE_VWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
+ (RLDICR (ANDI8_rec $Idx, 2), 2, 61)));
+ dag BE_VWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VWORD_PERM_VEC));
+ dag BE_MV_VWORD = (MFVSRD
+ (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
+ sub_64));
+ dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
+ sub_32);
+ dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
+ sub_32);
-let AddedComplexity = 400 in {
- let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsBigEndian] in {
- def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
- (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
- }
- let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in {
- def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
- (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
- }
-}
+ /* BE variable doubleword
+ Same as the LE doubleword except we shift in the VMX register for opposite
+ element indices.
+ */
+ dag BE_VDWORD_PERM_VEC = (v16i8 (LVSL ZERO8,
+ (RLDICR (ANDI8_rec $Idx, 1), 3, 60)));
+ dag BE_VDWORD_PERMUTE = (v16i8 (VPERM $S, $S, BE_VDWORD_PERM_VEC));
+ dag BE_VARIABLE_DWORD =
+ (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
+ sub_64));
-let Predicates = [HasP9Vector], hasSideEffects = 0 in {
- let mayStore = 1 in {
- def SPILLTOVSR_STX : PseudoXFormMemOp<(outs),
- (ins spilltovsrrc:$XT, memrr:$dst),
- "#SPILLTOVSR_STX", []>;
- def SPILLTOVSR_ST : PPCPostRAExpPseudo<(outs), (ins spilltovsrrc:$XT, memrix:$dst),
- "#SPILLTOVSR_ST", []>;
- }
- let mayLoad = 1 in {
- def SPILLTOVSR_LDX : PseudoXFormMemOp<(outs spilltovsrrc:$XT),
- (ins memrr:$src),
- "#SPILLTOVSR_LDX", []>;
- def SPILLTOVSR_LD : PPCPostRAExpPseudo<(outs spilltovsrrc:$XT), (ins memrix:$src),
- "#SPILLTOVSR_LD", []>;
+ /* BE variable float
+ - Shift the vector to line up the desired element to BE Word 0
+ - Convert 32-bit float to a 64-bit single precision float
+ */
+ dag BE_VFLOAT_PERM_VEC = (v16i8 (LVSL ZERO8, (RLDICR $Idx, 2, 61)));
+ dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
+ dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
+
+ /* BE variable double
+ Same as the BE doubleword except there is no move.
+ */
+ dag BE_VDOUBLE_PERMUTE = (v16i8 (VPERM (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
+ (v16i8 (COPY_TO_REGCLASS $S, VRRC)),
+ BE_VDWORD_PERM_VEC));
+ dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
+}
- }
+def AlignValues {
+ dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
+ dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
}
+
// Integer extend helper dags 32 -> 64
def AnyExts {
dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
@@ -4004,511 +2312,2234 @@ def LoadFP {
dag D = (f32 (load xoaddr:$D));
}
-// FP merge dags (for f32 -> v4f32)
-def MrgFP {
- dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC);
- dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC);
- dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC);
- dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC);
- dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
- (COPY_TO_REGCLASS $C, VSRC), 0));
- dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
- (COPY_TO_REGCLASS $D, VSRC), 0));
- dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
- dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
- dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
- dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
-}
+// FP merge dags (for f32 -> v4f32)
+def MrgFP {
+ dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC);
+ dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC);
+ dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC);
+ dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC);
+ dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $C, VSRC), 0));
+ dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $D, VSRC), 0));
+ dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
+ dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
+ dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
+ dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
+}
+
+// Word-element merge dags - conversions from f64 to i32 merged into vectors.
+def MrgWords {
+ // For big endian, we merge low and hi doublewords (A, B).
+ dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
+ dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
+ dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
+ dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
+ dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
+ dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));
+
+ // For little endian, we merge low and hi doublewords (B, A).
+ dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
+ dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
+ dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
+ dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
+ dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
+ dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));
+
+ // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
+ // then merge.
+ dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
+ (COPY_TO_REGCLASS f64:$C, VSRC), 0));
+ dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
+ (COPY_TO_REGCLASS f64:$D, VSRC), 0));
+ dag CVACS = (v4i32 (XVCVDPSXWS AC));
+ dag CVBDS = (v4i32 (XVCVDPSXWS BD));
+ dag CVACU = (v4i32 (XVCVDPUXWS AC));
+ dag CVBDU = (v4i32 (XVCVDPUXWS BD));
+
+ // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
+ // then merge.
+ dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
+ (COPY_TO_REGCLASS f64:$B, VSRC), 0));
+ dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
+ (COPY_TO_REGCLASS f64:$A, VSRC), 0));
+ dag CVDBS = (v4i32 (XVCVDPSXWS DB));
+ dag CVCAS = (v4i32 (XVCVDPSXWS CA));
+ dag CVDBU = (v4i32 (XVCVDPUXWS DB));
+ dag CVCAU = (v4i32 (XVCVDPUXWS CA));
+}
+
+//---------------------------- Anonymous Patterns ----------------------------//
+// Predicate combinations are kept in roughly chronological order in terms of
+// instruction availability in the architecture. For example, VSX came in with
+// ISA 2.06 (Power7). There have since been additions in ISA 2.07 (Power8) and
+// ISA 3.0 (Power9). However, the granularity of features on later subtargets
+// is finer for various reasons. For example, we have Power8Vector,
+// Power8Altivec, DirectMove that all came in with ISA 2.07. The situation is
+// similar with ISA 3.0 with Power9Vector, Power9Altivec, IsISA3_0. Then there
+// are orthogonal predicates such as endianness for which the order was
+// arbitrarily chosen to be Big, Little.
+//
+// Predicate combinations available:
+// [HasVSX]
+// [HasVSX, IsBigEndian]
+// [HasVSX, IsLittleEndian]
+// [HasVSX, NoP9Vector]
+// [HasVSX, HasOnlySwappingMemOps]
+// [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
+// [HasVSX, HasP8Vector]
+// [HasVSX, HasP8Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, IsLittleEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian]
+// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasDirectMove]
+// [HasVSX, HasDirectMove, IsBigEndian]
+// [HasVSX, HasDirectMove, IsLittleEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian]
+// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian]
+// [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasP9Vector]
+// [HasVSX, HasP9Vector, IsBigEndian]
+// [HasVSX, HasP9Vector, IsLittleEndian]
+// [HasVSX, HasP9Altivec]
+// [HasVSX, HasP9Altivec, IsBigEndian]
+// [HasVSX, HasP9Altivec, IsLittleEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian]
+// [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian]
+
+let AddedComplexity = 400 in {
+// Valid for any VSX subtarget, regardless of endianness.
+let Predicates = [HasVSX] in {
+def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
+ (v4i32 (XXLNOR $A, $A))>;
+def : Pat<(v4i32 (or (and (vnot_ppc v4i32:$C), v4i32:$A),
+ (and v4i32:$B, v4i32:$C))),
+ (v4i32 (XXSEL $A, $B, $C))>;
+
+// Additional fnmsub patterns: -a*b + c == -(a*b - c)
+def : Pat<(fma (fneg f64:$A), f64:$B, f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$B), f64:$C),
+ (XSNMSUBADP $C, $A, $B)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$B, v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$B), v2f64:$C),
+ (XVNMSUBADP $C, $A, $B)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$B, v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$B), v4f32:$C),
+ (XVNMSUBASP $C, $A, $B)>;
+
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v1i128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v1i128 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2i64 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert f128:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVSXWDP (v2i64 (XXMRGLW $C, $C))))>;
+
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 0)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGHW $C, $C))))>;
+def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
+ (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
+
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 0)), (XVCVSPDP (XXMRGHW $C, $C))>;
+def : Pat<(v2f64 (PPCfpexth v4f32:$C, 1)), (XVCVSPDP (XXMRGLW $C, $C))>;
+
+// Permutes.
+def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2i64 (PPCxxswapd v2i64:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
+def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
+
+// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
+// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
+def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)),
+ (XXPERMDI $src, $src, 2)>;
+
+// Selects.
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETULE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETEQ)),
+ (SELECT_VSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGE)),
+ (SELECT_VSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGE)),
+ (SELECT_VSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETGT)),
+ (SELECT_VSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETUGT)),
+ (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETNE)),
+ (SELECT_VSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETULE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_VSFRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_VSFRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGE)),
+ (SELECT_VSFRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_VSFRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETUGT)),
+ (SELECT_VSFRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_VSFRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Divides.
+def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
+ (XVDIVSP $A, $B)>;
+def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
+ (XVDIVDP $A, $B)>;
+
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+ (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+ (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+ (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+ (XVRSQRTEDP $A)>;
+
+// Vector selection
+def : Pat<(v16i8 (vselect v16i8:$vA, v16i8:$vB, v16i8:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(v8i16 (vselect v8i16:$vA, v8i16:$vB, v8i16:$vC)),
+ (COPY_TO_REGCLASS
+ (XXSEL (COPY_TO_REGCLASS $vC, VSRC),
+ (COPY_TO_REGCLASS $vB, VSRC),
+ (COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(vselect v4i32:$vA, v4i32:$vB, v4i32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2i64:$vB, v2i64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v4i32:$vA, v4f32:$vB, v4f32:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+def : Pat<(vselect v2i64:$vA, v2f64:$vB, v2f64:$vC),
+ (XXSEL $vC, $vB, $vA)>;
+
+def : Pat<(v4f32 (fmaxnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMAXSP $src1, $src2))>;
+def : Pat<(v4f32 (fminnum v4f32:$src1, v4f32:$src2)),
+ (v4f32 (XVMINSP $src1, $src2))>;
+def : Pat<(v2f64 (fmaxnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMAXDP $src1, $src2))>;
+def : Pat<(v2f64 (fminnum v2f64:$src1, v2f64:$src2)),
+ (v2f64 (XVMINDP $src1, $src2))>;
+
+// f32 Min.
+def : Pat<(f32 (fminnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+def : Pat<(f32 (fminnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Min)>;
+// F32 Max.
+def : Pat<(f32 (fmaxnum_ieee f32:$A, f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), f32:$B)),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee f32:$A, (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+def : Pat<(f32 (fmaxnum_ieee (fcanonicalize f32:$A), (fcanonicalize f32:$B))),
+ (f32 FpMinMax.F32Max)>;
+
+// f64 Min.
+def : Pat<(f64 (fminnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+def : Pat<(f64 (fminnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMINDP $A, $B))>;
+// f64 Max.
+def : Pat<(f64 (fmaxnum_ieee f64:$A, f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), f64:$B)),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee f64:$A, (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+def : Pat<(f64 (fmaxnum_ieee (fcanonicalize f64:$A), (fcanonicalize f64:$B))),
+ (f64 (XSMAXDP $A, $B))>;
+
+def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+// Rounding for single precision.
+def : Pat<(f32 (fround f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPI
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ffloor f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIM
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (fceil f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIP
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (ftrunc f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIZ
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f32 (frint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
-// Word-element merge dags - conversions from f64 to i32 merged into vectors.
-def MrgWords {
- // For big endian, we merge low and hi doublewords (A, B).
- dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
- dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
- dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
- dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
- dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
- dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));
+// Rounding for double precision.
+def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
- // For little endian, we merge low and hi doublewords (B, A).
- dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
- dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
- dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
- dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
- dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
- dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));
+// Materialize a zero-vector of long long
+def : Pat<(v2i64 immAllZerosV),
+ (v2i64 (XXLXORz))>;
- // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
- // then merge.
- dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
- (COPY_TO_REGCLASS f64:$C, VSRC), 0));
- dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
- (COPY_TO_REGCLASS f64:$D, VSRC), 0));
- dag CVACS = (v4i32 (XVCVDPSXWS AC));
- dag CVBDS = (v4i32 (XVCVDPSXWS BD));
- dag CVACU = (v4i32 (XVCVDPUXWS AC));
- dag CVBDU = (v4i32 (XVCVDPUXWS BD));
+// Build vectors of floating point converted to i32.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
+ DblToInt.A, DblToInt.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
+ DblToUInt.A, DblToUInt.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
+def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
+ (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
+ (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
+def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
+ (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
+ (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
+def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+ (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
+def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+ (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
+def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
+ (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
+ (v2f64 (LXVDSX xoaddr:$A))>;
+def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
+ (v2i64 (LXVDSX xoaddr:$A))>;
+
+// Build vectors of floating point converted to i64.
+def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
+def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
+def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
+ (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
+def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
+ (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
+} // HasVSX
- // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
- // then merge.
- dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
- (COPY_TO_REGCLASS f64:$B, VSRC), 0));
- dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
- (COPY_TO_REGCLASS f64:$A, VSRC), 0));
- dag CVDBS = (v4i32 (XVCVDPSXWS DB));
- dag CVCAS = (v4i32 (XVCVDPSXWS CA));
- dag CVDBU = (v4i32 (XVCVDPUXWS DB));
- dag CVCAU = (v4i32 (XVCVDPUXWS CA));
-}
+// Any big endian VSX subtarget.
+let Predicates = [HasVSX, IsBigEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
-// Patterns for BUILD_VECTOR nodes.
-let AddedComplexity = 400 in {
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS $S, VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
- let Predicates = [HasVSX] in {
- // Build vectors of floating point converted to i32.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
- DblToInt.A, DblToInt.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
- DblToUInt.A, DblToUInt.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
- def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
- (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
- (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
- def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
- (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
- (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
- def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
- (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
- def : Pat<(v2f64 (PPCldsplat xoaddr:$A)),
- (v2f64 (LXVDSX xoaddr:$A))>;
- def : Pat<(v2i64 (PPCldsplat xoaddr:$A)),
- (v2i64 (LXVDSX xoaddr:$A))>;
-
- // Build vectors of floating point converted to i64.
- def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
- def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
- (v2i64 (XXPERMDIs
- (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
- def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
- (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
- def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
- (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
- }
+def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+ (v2f64 (XXPERMDI
+ (COPY_TO_REGCLASS $A, VSRC),
+ (COPY_TO_REGCLASS $B, VSRC), 0))>;
+// Using VMRGEW to assemble the final vector would be a lower latency
+// solution. However, we choose to go with the slightly higher latency
+// XXPERMDI for 2 reasons:
+// 1. This is likely to occur in unrolled loops where regpressure is high,
+// so we want to use the latter as it has access to all 64 VSX registers.
+// 2. Using Altivec instructions in this sequence would likely cause the
+// allocation of Altivec registers even for the loads which in turn would
+// force the use of LXSIWZX for the loads, adding a cycle of latency to
+// each of the loads which would otherwise be able to use LFIWZX.
+def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
+ (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
+def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
+ (VMRGEW MrgFP.AC, MrgFP.BD)>;
+def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+ DblToFlt.B0, DblToFlt.B1)),
+ (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+
+// Convert 4 doubles to a vector of ints.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP $A))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
+ (XXPERMDI $A, $B, 3), 1)))>;
+def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP $A))>;
+def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP $A))>;
+def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+} // HasVSX, IsBigEndian
+
+// Any little endian VSX subtarget.
+let Predicates = [HasVSX, IsLittleEndian] in {
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
+ (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
- let Predicates = [HasVSX, NoP9Vector] in {
- // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
- (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
- (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
- }
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
- let Predicates = [IsBigEndian, HasP8Vector] in {
- def : Pat<DWToSPExtractConv.BVU,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
- (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
- def : Pat<DWToSPExtractConv.BVS,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
- (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
- def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
-
- // Elements in a register on a BE system are in order <0, 1, 2, 3>.
- // The store instructions store the second word from the left.
- // So to align element zero, we need to modulo-left-shift by 3 words.
- // Similar logic applies for elements 2 and 3.
- foreach Idx = [ [0,3], [2,1], [3,2] ] in {
- def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- }
- }
+def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfid (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVSXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
+def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
+ (f64 (XSCVUXDDP (COPY_TO_REGCLASS (f64 (COPY_TO_REGCLASS $S, VSRC)), VSFRC)))>;
- let Predicates = [HasP8Vector, IsBigEndian, NoP9Vector] in {
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- }
-
- // Big endian, available on all targets with VSX
- let Predicates = [IsBigEndian, HasVSX] in {
- def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
- (v2f64 (XXPERMDI
- (COPY_TO_REGCLASS $A, VSRC),
- (COPY_TO_REGCLASS $B, VSRC), 0))>;
- // Using VMRGEW to assemble the final vector would be a lower latency
- // solution. However, we choose to go with the slightly higher latency
- // XXPERMDI for 2 reasons:
- // 1. This is likely to occur in unrolled loops where regpressure is high,
- // so we want to use the latter as it has access to all 64 VSX registers.
- // 2. Using Altivec instructions in this sequence would likely cause the
- // allocation of Altivec registers even for the loads which in turn would
- // force the use of LXSIWZX for the loads, adding a cycle of latency to
- // each of the loads which would otherwise be able to use LFIWZX.
- def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
- (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32A, MrgFP.LD32B),
- (XXMRGHW MrgFP.LD32C, MrgFP.LD32D), 3))>;
- def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
- (VMRGEW MrgFP.AC, MrgFP.BD)>;
- def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
- DblToFlt.B0, DblToFlt.B1)),
- (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
-
- // Convert 4 doubles to a vector of ints.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
- DblToInt.C, DblToInt.D)),
- (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
- DblToUInt.C, DblToUInt.D)),
- (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
- ExtDbl.B0S, ExtDbl.B1S)),
- (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
- ExtDbl.B0U, ExtDbl.B1U)),
- (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
- (f64 (fpextend (extractelt v4f32:$A, 1))))),
- (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
- (f64 (fpextend (extractelt v4f32:$A, 0))))),
- (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
- (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
- (f64 (fpextend (extractelt v4f32:$A, 2))))),
- (v2f64 (XVCVSPDP $A))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
- (f64 (fpextend (extractelt v4f32:$A, 3))))),
- (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
- (f64 (fpextend (extractelt v4f32:$A, 3))))),
- (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
- (f64 (fpextend (extractelt v4f32:$A, 2))))),
- (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
- (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
- (f64 (fpextend (extractelt v4f32:$B, 0))))),
- (v2f64 (XVCVSPDP (XXPERMDI $A, $B, 0)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
- (f64 (fpextend (extractelt v4f32:$B, 3))))),
- (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $A, $B, 3),
- (XXPERMDI $A, $B, 3), 1)))>;
- def : Pat<WToDPExtractConv.BV02S,
- (v2f64 (XVCVSXWDP $A))>;
- def : Pat<WToDPExtractConv.BV13S,
- (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
- def : Pat<WToDPExtractConv.BV02U,
- (v2f64 (XVCVUXWDP $A))>;
- def : Pat<WToDPExtractConv.BV13U,
- (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
- }
+def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+ (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+// Little endian, available on all targets with VSX
+def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+ (v2f64 (XXPERMDI
+ (COPY_TO_REGCLASS $B, VSRC),
+ (COPY_TO_REGCLASS $A, VSRC), 0))>;
+// Using VMRGEW to assemble the final vector would be a lower latency
+// solution. However, we choose to go with the slightly higher latency
+// XXPERMDI for 2 reasons:
+// 1. This is likely to occur in unrolled loops where regpressure is high,
+// so we want to use the latter as it has access to all 64 VSX registers.
+// 2. Using Altivec instructions in this sequence would likely cause the
+// allocation of Altivec registers even for the loads which in turn would
+// force the use of LXSIWZX for the loads, adding a cycle of latency to
+// each of the loads which would otherwise be able to use LFIWZX.
+def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
+ (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
+ (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
+def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
+ (VMRGEW MrgFP.AC, MrgFP.BD)>;
+def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+ DblToFlt.B0, DblToFlt.B1)),
+ (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+
+// Convert 4 doubles to a vector of ints.
+def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
+ DblToInt.C, DblToInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
+def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
+ DblToUInt.C, DblToUInt.D)),
+ (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
+ ExtDbl.B0S, ExtDbl.B1S)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
+def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
+ ExtDbl.B0U, ExtDbl.B1U)),
+ (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 1))))),
+ (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 0))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
+ (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP $A))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
+ (f64 (fpextend (extractelt v4f32:$A, 3))))),
+ (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$A, 2))))),
+ (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
+ (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
+ (f64 (fpextend (extractelt v4f32:$B, 0))))),
+ (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
+ (XXPERMDI $B, $A, 3), 1)))>;
+def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
+ (f64 (fpextend (extractelt v4f32:$B, 3))))),
+ (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
+def : Pat<WToDPExtractConv.BV02S,
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<WToDPExtractConv.BV13S,
+ (v2f64 (XVCVSXWDP $A))>;
+def : Pat<WToDPExtractConv.BV02U,
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
+def : Pat<WToDPExtractConv.BV13U,
+ (v2f64 (XVCVUXWDP $A))>;
+} // HasVSX, IsLittleEndian
+
+// Any pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, NoP9Vector] in {
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xoaddr:$dst)>;
+
+// Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
+def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+ (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
+def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+ (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>;
+def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+ (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
+ (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
+def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+ (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
+ (XFLOADf32 xoaddr:$A), VSFRC)), 0))>;
+} // HasVSX, NoP9Vector
+
+// Any VSX subtarget that only has loads and stores that load in big endian
+// order regardless of endianness. This is really pre-Power9 subtargets.
+let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
+ def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
- let Predicates = [IsLittleEndian, HasP8Vector] in {
- def : Pat<DWToSPExtractConv.BVU,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
- (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
- def : Pat<DWToSPExtractConv.BVS,
- (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
- (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
- def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
-
- // Elements in a register on a LE system are in order <3, 2, 1, 0>.
- // The store instructions store the second word from the left.
- // So to align element 3, we need to modulo-left-shift by 3 words.
- // Similar logic applies for elements 0 and 1.
- foreach Idx = [ [0,2], [1,1], [3,3] ] in {
- def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
- (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
- sub_64), xoaddr:$src)>;
- }
- }
+ // Stores.
+ def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+} // HasVSX, HasOnlySwappingMemOps
- let Predicates = [HasP8Vector, IsLittleEndian, NoP9Vector] in {
- def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
- xoaddr:$src)>;
- def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
- (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
- }
-
- let Predicates = [IsLittleEndian, HasVSX] in {
- // Little endian, available on all targets with VSX
- def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
- (v2f64 (XXPERMDI
- (COPY_TO_REGCLASS $B, VSRC),
- (COPY_TO_REGCLASS $A, VSRC), 0))>;
- // Using VMRGEW to assemble the final vector would be a lower latency
- // solution. However, we choose to go with the slightly higher latency
- // XXPERMDI for 2 reasons:
- // 1. This is likely to occur in unrolled loops where regpressure is high,
- // so we want to use the latter as it has access to all 64 VSX registers.
- // 2. Using Altivec instructions in this sequence would likely cause the
- // allocation of Altivec registers even for the loads which in turn would
- // force the use of LXSIWZX for the loads, adding a cycle of latency to
- // each of the loads which would otherwise be able to use LFIWZX.
- def : Pat<(v4f32 (build_vector LoadFP.A, LoadFP.B, LoadFP.C, LoadFP.D)),
- (v4f32 (XXPERMDI (XXMRGHW MrgFP.LD32D, MrgFP.LD32C),
- (XXMRGHW MrgFP.LD32B, MrgFP.LD32A), 3))>;
- def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
- (VMRGEW MrgFP.AC, MrgFP.BD)>;
- def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
- DblToFlt.B0, DblToFlt.B1)),
- (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
-
- // Convert 4 doubles to a vector of ints.
- def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
- DblToInt.C, DblToInt.D)),
- (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
- def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
- DblToUInt.C, DblToUInt.D)),
- (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
- ExtDbl.B0S, ExtDbl.B1S)),
- (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
- def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
- ExtDbl.B0U, ExtDbl.B1U)),
- (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
- (f64 (fpextend (extractelt v4f32:$A, 1))))),
- (v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
- (f64 (fpextend (extractelt v4f32:$A, 0))))),
- (v2f64 (XXPERMDI (XVCVSPDP (XXMRGLW $A, $A)),
- (XVCVSPDP (XXMRGLW $A, $A)), 2))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
- (f64 (fpextend (extractelt v4f32:$A, 2))))),
- (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
- (f64 (fpextend (extractelt v4f32:$A, 3))))),
- (v2f64 (XVCVSPDP $A))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
- (f64 (fpextend (extractelt v4f32:$A, 3))))),
- (v2f64 (XVCVSPDP (XXMRGHW $A, $A)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
- (f64 (fpextend (extractelt v4f32:$A, 2))))),
- (v2f64 (XXPERMDI (XVCVSPDP (XXMRGHW $A, $A)),
- (XVCVSPDP (XXMRGHW $A, $A)), 2))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
- (f64 (fpextend (extractelt v4f32:$B, 0))))),
- (v2f64 (XVCVSPDP (XXSLDWI (XXPERMDI $B, $A, 3),
- (XXPERMDI $B, $A, 3), 1)))>;
- def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 3))),
- (f64 (fpextend (extractelt v4f32:$B, 3))))),
- (v2f64 (XVCVSPDP (XXPERMDI $B, $A, 0)))>;
- def : Pat<WToDPExtractConv.BV02S,
- (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
- def : Pat<WToDPExtractConv.BV13S,
- (v2f64 (XVCVSXWDP $A))>;
- def : Pat<WToDPExtractConv.BV02U,
- (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
- def : Pat<WToDPExtractConv.BV13U,
- (v2f64 (XVCVUXWDP $A))>;
- }
+// Big endian VSX subtarget that only has loads and stores that always load
+// in big endian order. Really big endian pre-Power9 subtargets.
+let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
+ def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+ def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+ def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+ def : Pat<(store v4i32:$XT, xoaddr:$dst), (STXVW4X $XT, xoaddr:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVW4X $rS, xoaddr:$dst)>;
+} // HasVSX, HasOnlySwappingMemOps, IsBigEndian
+
+// Any Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector] in {
+def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+ (XXLEQV $A, $B)>;
+def : Pat<(f64 (extloadf32 xoaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>;
+def : Pat<(f32 (fpround (f64 (extloadf32 xoaddr:$src)))),
+ (f32 (XFLOADf32 xoaddr:$src))>;
+def : Pat<(f64 (fpextend f32:$src)),
+ (COPY_TO_REGCLASS $src, VSFRC)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGE)),
+ (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETUGT)),
+ (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+// Additional xsnmsubasp patterns: -a*b + c == -(a*b - c)
+def : Pat<(fma (fneg f32:$A), f32:$B, f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$B), f32:$C),
+ (XSNMSUBASP $C, $A, $B)>;
+
+// Instructions for converting float to i32 feeding a store.
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 4),
+ (STIWX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 4),
+ (STIWX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (umax v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMAXUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (smin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINSD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+def : Pat<(v2i64 (umin v2i64:$src1, v2i64:$src2)),
+ (v2i64 (VMINUD (COPY_TO_REGCLASS $src1, VRRC),
+ (COPY_TO_REGCLASS $src2, VRRC)))>;
+
+def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
+ (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
+ (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
+ (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
+ (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
+} // HasVSX, HasP8Vector
+
+// Big endian Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in {
+def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS $S1, VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
- let Predicates = [HasDirectMove] in {
- // Endianness-neutral constant splat on P8 and newer targets. The reason
- // for this pattern is that on targets with direct moves, we don't expand
- // BUILD_VECTOR nodes for v4i32.
- def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
- immSExt5NonZero:$A, immSExt5NonZero:$A)),
- (v4i32 (VSPLTISW imm:$A))>;
- }
+// v4f32 scalar <-> vector conversions (BE)
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XSCVDPSPN $A))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN $S))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+
+// LIWAX - This instruction is used for sign extending i32 -> i64.
+// LIWZX - This instruction will be emitted for i32, f32, and when
+// zero-extending i32 to i64 (zext i32 -> i64).
+def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+ (v4i32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+ (v4f32 (XXSLDWIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>;
+
+def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3),
+ (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3)))>;
+def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3),
+ (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3)))>;
+def : Pat<(store (i32 (extractelt v4i32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f32 (extractelt v4f32:$A, 1)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+// Elements in a register on a BE system are in order <0, 1, 2, 3>.
+// The store instructions store the second word from the left.
+// So to align element zero, we need to modulo-left-shift by 3 words.
+// Similar logic applies for elements 2 and 3.
+foreach Idx = [ [0,3], [2,1], [3,2] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+}
+} // HasVSX, HasP8Vector, IsBigEndian
+
+// Little endian Power8 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in {
+def : Pat<DWToSPExtractConv.El0SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1SS1,
+ (f32 (XSCVSXDSP (COPY_TO_REGCLASS
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El0US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S1, $S1, 2), VSFRC)))>;
+def : Pat<DWToSPExtractConv.El1US1,
+ (f32 (XSCVUXDSP (COPY_TO_REGCLASS
+ (f64 (COPY_TO_REGCLASS $S1, VSRC)), VSFRC)))>;
- let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in {
- // Big endian integer vectors using direct moves.
- def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
- (v2i64 (XXPERMDI
- (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
- (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (XXPERMDI
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
- }
+// v4f32 scalar <-> vector conversions (LE)
+def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+ (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+ (f32 (XSCVSPDPN (XXPERMDI $S, $S, 2)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+ (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+ (f32 (XSCVSPDPN $S))>;
+def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+ (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 3))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 2))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 1))))>;
+def : Pat<(f32 (PPCfcfids (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVSPDPN (XVCVSXWSP (XXSPLTW $A, 0))))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 3)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 2)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 1)), VSFRC))>;
+def : Pat<(f64 (PPCfcfid (f64 (PPCmtvsra (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (COPY_TO_REGCLASS (XVCVSXWDP (XXSPLTW $A, 0)), VSFRC))>;
+
+// LIWAX - This instruction is used for sign extending i32 -> i64.
+// LIWZX - This instruction will be emitted for i32, f32, and when
+// zero-extending i32 to i64 (zext i32 -> i64).
+def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSFRC), 2))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
+def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+ (v4i32 (XXPERMDIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
+def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+ (v4f32 (XXPERMDIs
+ (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSFRC), 2))>;
+
+def : Pat<DWToSPExtractConv.BVU,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVUXDSP $S2), (XVCVUXDSP $S2), 3),
+ (XXSLDWI (XVCVUXDSP $S1), (XVCVUXDSP $S1), 3)))>;
+def : Pat<DWToSPExtractConv.BVS,
+ (v4f32 (VPKUDUM (XXSLDWI (XVCVSXDSP $S2), (XVCVSXDSP $S2), 3),
+ (XXSLDWI (XVCVSXDSP $S1), (XVCVSXDSP $S1), 3)))>;
+def : Pat<(store (i32 (extractelt v4i32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f32 (extractelt v4f32:$A, 2)), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+
+// Elements in a register on a LE system are in order <3, 2, 1, 0>.
+// The store instructions store the second word from the left.
+// So to align element 3, we need to modulo-left-shift by 3 words.
+// Similar logic applies for elements 0 and 1.
+foreach Idx = [ [0,2], [1,1], [3,3] ] in {
+ def : Pat<(store (i32 (extractelt v4i32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+ def : Pat<(store (f32 (extractelt v4f32:$A, !head(Idx))), xoaddr:$src),
+ (STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
+ sub_64), xoaddr:$src)>;
+}
+} // HasVSX, HasP8Vector, IsLittleEndian
+
+// Big endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in {
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian
+
+// Little endian pre-Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in {
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ xoaddr:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>;
+} // HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian
+
+// Any VSX target with direct moves.
+let Predicates = [HasVSX, HasDirectMove] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+ (i32 (MFVSRWZ (EXTRACT_SUBREG
+ (XXSLDWI (XSCVDPSPN $S), (XSCVDPSPN $S), 3),
+ sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+ (f32 (XSCVSPDPN
+ (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
- let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in {
- // Little endian integer vectors using direct moves.
- def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
- (v2i64 (XXPERMDI
- (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
- (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (XXPERMDI
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
- (COPY_TO_REGCLASS
- (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
- }
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+ (i64 (MFVSRD $S))>;
- let Predicates = [HasP8Vector] in {
- def : Pat<(v1i128 (bitconvert (v16i8 immAllOnesV))),
- (v1i128 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v2i64 (bitconvert (v16i8 immAllOnesV))),
- (v2i64 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v8i16 (bitconvert (v16i8 immAllOnesV))),
- (v8i16 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- def : Pat<(v16i8 (bitconvert (v16i8 immAllOnesV))),
- (v16i8 (COPY_TO_REGCLASS(XXLEQVOnes), VSRC))>;
- }
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+ (f64 (MTVSRD $S))>;
- let Predicates = [HasP9Vector] in {
- // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
- def : Pat<(v4i32 (scalar_to_vector i32:$A)),
- (v4i32 (MTVSRWS $A))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
- (v4i32 (MTVSRWS $A))>;
- def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
- immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
- (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
- def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
- (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
- (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
- def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
- def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
- (v4i32 (XXSPLTW (COPY_TO_REGCLASS
- (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
- def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
- (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddrX4:$A),
- VSFRC)), 0))>;
- def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
- (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
- (DFLOADf32 iaddrX4:$A),
- VSFRC)), 0))>;
- def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
- (v4f32 (LXVWSX xoaddr:$A))>;
- def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
- (v4i32 (LXVWSX xoaddr:$A))>;
- }
+// Rounding to integer.
+def : Pat<(i64 (lrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (lrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (llrint f64:$S)),
+ (i64 (MFVSRD (FCTID $S)))>;
+def : Pat<(i64 (llrint f32:$S)),
+ (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>;
+def : Pat<(i64 (lround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (lround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
+def : Pat<(i64 (llround f64:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI $S))))>;
+def : Pat<(i64 (llround f32:$S)),
+ (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>;
- let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
- def : Pat<(i64 (extractelt v2i64:$A, 1)),
- (i64 (MFVSRLD $A))>;
- // Better way to build integer vectors if we have MTVSRDD. Big endian.
- def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
- (v2i64 (MTVSRDD $rB, $rA))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (MTVSRDD
- (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
- (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
- }
+// Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead
+// of f64
+def : Pat<(v8i16 (PPCmtvsrz i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64))>;
+
+// Endianness-neutral constant splat on P8 and newer targets. The reason
+// for this pattern is that on targets with direct moves, we don't expand
+// BUILD_VECTOR nodes for v4i32.
+def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
+ immSExt5NonZero:$A, immSExt5NonZero:$A)),
+ (v4i32 (VSPLTISW imm:$A))>;
+} // HasVSX, HasDirectMove
+
+// Big endian VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, IsBigEndian] in {
+// v16i8 scalar <-> vector conversions (BE)
+def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+
+// v2i64 scalar <-> vector conversions (BE)
+def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // HasVSX, HasDirectMove, IsBigEndian
+
+// Little endian VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, IsLittleEndian] in {
+ // v16i8 scalar <-> vector conversions (LE)
+ def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+ (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+ (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+ def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 MovesToVSR.LE_WORD_0)>;
+ def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+ (v2i64 MovesToVSR.LE_DWORD_0)>;
+ // v2i64 scalar <-> vector conversions (LE)
+ def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+ (i64 VectorExtractions.LE_DWORD_0)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+ (i64 VectorExtractions.LE_DWORD_1)>;
+ def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+ (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // HasVSX, HasDirectMove, IsLittleEndian
+
+// Big endian pre-P9 VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] in {
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+// v8i16 scalar <-> vector conversions (BE)
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+// v4i32 scalar <-> vector conversions (BE)
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+} // HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian
+
+// Little endian pre-P9 VSX subtarget with direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] in {
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 VectorExtractions.LE_BYTE_0)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 VectorExtractions.LE_BYTE_1)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 VectorExtractions.LE_BYTE_2)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 VectorExtractions.LE_BYTE_3)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 VectorExtractions.LE_BYTE_4)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 VectorExtractions.LE_BYTE_5)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 VectorExtractions.LE_BYTE_6)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 VectorExtractions.LE_BYTE_7)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 VectorExtractions.LE_BYTE_8)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 VectorExtractions.LE_BYTE_9)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 VectorExtractions.LE_BYTE_10)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 VectorExtractions.LE_BYTE_11)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 VectorExtractions.LE_BYTE_12)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 VectorExtractions.LE_BYTE_13)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 VectorExtractions.LE_BYTE_14)>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 VectorExtractions.LE_BYTE_15)>;
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+// v8i16 scalar <-> vector conversions (LE)
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 VectorExtractions.LE_HALF_0)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 VectorExtractions.LE_HALF_1)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 VectorExtractions.LE_HALF_2)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 VectorExtractions.LE_HALF_3)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 VectorExtractions.LE_HALF_4)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 VectorExtractions.LE_HALF_5)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 VectorExtractions.LE_HALF_6)>;
+def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+ (i32 VectorExtractions.LE_HALF_7)>;
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+// v4i32 scalar <-> vector conversions (LE)
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 VectorExtractions.LE_WORD_0)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_1)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 VectorExtractions.LE_WORD_3)>;
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+} // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian
+
+// Big endian pre-Power9 VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in {
+// Big endian integer vectors using direct moves.
+def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+ (v2i64 (XXPERMDI
+ (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
+ (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.B, AnyExts.A, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian
+
+// Little endian pre-Power9 VSX subtarget that has direct moves.
+let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in {
+// Little endian integer vectors using direct moves.
+def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+ (v2i64 (XXPERMDI
+ (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
+ (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (XXPERMDI
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.C, AnyExts.D, 32, 0)), VSRC),
+ (COPY_TO_REGCLASS
+ (MTVSRD (RLDIMI AnyExts.A, AnyExts.B, 32, 0)), VSRC), 0)>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+}
- let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
- def : Pat<(i64 (extractelt v2i64:$A, 0)),
- (i64 (MFVSRLD $A))>;
- // Better way to build integer vectors if we have MTVSRDD. Little endian.
- def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
- (v2i64 (MTVSRDD $rB, $rA))>;
- def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
- (MTVSRDD
- (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
- (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
- }
- // P9 Altivec instructions that can be used to build vectors.
- // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
- // with complexities of existing build vector patterns in this file.
- let Predicates = [HasP9Altivec, IsLittleEndian] in {
- def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
- (v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
- (v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
- HWordToWord.LE_A2, HWordToWord.LE_A3)),
- (v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
- ByteToWord.LE_A2, ByteToWord.LE_A3)),
- (v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
- (v2i64 (VEXTSB2D $A))>;
- }
+// Any Power9 VSX subtarget.
+let Predicates = [HasVSX, HasP9Vector] in {
+// Additional fnmsub patterns: -a*b + c == -(a*b - c)
+def : Pat<(fma (fneg f128:$A), f128:$B, f128:$C), (XSNMSUBQP $C, $A, $B)>;
+def : Pat<(fma f128:$A, (fneg f128:$B), f128:$C), (XSNMSUBQP $C, $A, $B)>;
+
+def : Pat<(f128 (sint_to_fp i64:$src)),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (sint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP $src))>;
+def : Pat<(f128 (sint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVSDQP (VEXTSW2Ds $src)))>;
+def : Pat<(f128 (uint_to_fp i64:$src)),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP $src))>;
+
+// Convert (Un)Signed Word -> QP.
+def : Pat<(f128 (sint_to_fp i32:$src)),
+ (f128 (XSCVSDQP (MTVSRWA $src)))>;
+def : Pat<(f128 (sint_to_fp (i32 (load xoaddr:$src)))),
+ (f128 (XSCVSDQP (LIWAX xoaddr:$src)))>;
+def : Pat<(f128 (uint_to_fp i32:$src)),
+ (f128 (XSCVUDQP (MTVSRWZ $src)))>;
+def : Pat<(f128 (uint_to_fp (i32 (load xoaddr:$src)))),
+ (f128 (XSCVUDQP (LIWZX xoaddr:$src)))>;
+
+// Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
+// separate pattern so that it can convert the input register class from
+// VRRC(v8i16) to VSRC.
+def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
+ (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
+
+// Use current rounding mode
+def : Pat<(f128 (fnearbyint f128:$vB)), (f128 (XSRQPI 0, $vB, 3))>;
+// Round to nearest, ties away from zero
+def : Pat<(f128 (fround f128:$vB)), (f128 (XSRQPI 0, $vB, 0))>;
+// Round towards Zero
+def : Pat<(f128 (ftrunc f128:$vB)), (f128 (XSRQPI 1, $vB, 1))>;
+// Round towards +Inf
+def : Pat<(f128 (fceil f128:$vB)), (f128 (XSRQPI 1, $vB, 2))>;
+// Round towards -Inf
+def : Pat<(f128 (ffloor f128:$vB)), (f128 (XSRQPI 1, $vB, 3))>;
+// Use current rounding mode, [with Inexact]
+def : Pat<(f128 (frint f128:$vB)), (f128 (XSRQPIX 0, $vB, 3))>;
+
+def : Pat<(f128 (int_ppc_scalar_insert_exp_qp f128:$vA, i64:$vB)),
+ (f128 (XSIEXPQP $vA, (MTVSRD $vB)))>;
+
+def : Pat<(i64 (int_ppc_scalar_extract_expq f128:$vA)),
+ (i64 (MFVSRD (EXTRACT_SUBREG
+ (v2i64 (XSXEXPQP $vA)), sub_64)))>;
+
+// Extra patterns expanding to vector Extract Word/Insert Word
+def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
+ (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
+def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
+ (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
+
+// Vector Reverse
+def : Pat<(v8i16 (bswap v8i16 :$A)),
+ (v8i16 (COPY_TO_REGCLASS (XXBRH (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+def : Pat<(v1i128 (bswap v1i128 :$A)),
+ (v1i128 (COPY_TO_REGCLASS (XXBRQ (COPY_TO_REGCLASS $A, VSRC)), VRRC))>;
+
+// D-Form Load/Store
+def : Pat<(v4i32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v4f32 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2i64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2f64 (quadwOffsetLoad iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(f128 (quadwOffsetLoad iaddrX16:$src)),
+ (COPY_TO_REGCLASS (LXV memrix16:$src), VRRC)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddrX16:$src)), (LXV memrix16:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddrX16:$src)), (LXV memrix16:$src)>;
+
+def : Pat<(quadwOffsetStore v4f32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v4i32:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v2f64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(quadwOffsetStore f128:$rS, iaddrX16:$dst),
+ (STXV (COPY_TO_REGCLASS $rS, VSRC), memrix16:$dst)>;
+def : Pat<(quadwOffsetStore v2i64:$rS, iaddrX16:$dst), (STXV $rS, memrix16:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddrX16:$dst),
+ (STXV $rS, memrix16:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddrX16:$dst),
+ (STXV $rS, memrix16:$dst)>;
+
+def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
+def : Pat<(f128 (nonQuadwOffsetLoad xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVX xoaddr:$src), VRRC)>;
+def : Pat<(nonQuadwOffsetStore f128:$rS, xoaddr:$dst),
+ (STXVX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+ (STXVX $rS, xoaddr:$dst)>;
+
+// Build vectors from i8 loads
+def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
+ (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
+def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
+ (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
+def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
+ (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
+def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
+ (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
+def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
+ (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
+def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
+ (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
+
+// Build vectors from i16 loads
+def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
+ (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
+def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
+ (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
+def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
+ (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
+def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
+ (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
+def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
+ (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
+
+// Load/convert and convert/store patterns for f16.
+def : Pat<(f64 (extloadf16 xoaddr:$src)),
+ (f64 (XSCVHPDP (LXSIHZX xoaddr:$src)))>;
+def : Pat<(truncstoref16 f64:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP $src), xoaddr:$dst)>;
+def : Pat<(f32 (extloadf16 xoaddr:$src)),
+ (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX xoaddr:$src)), VSSRC))>;
+def : Pat<(truncstoref16 f32:$src, xoaddr:$dst),
+ (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), xoaddr:$dst)>;
+
+// Vector sign extensions
+def : Pat<(f64 (PPCVexts f64:$A, 1)),
+ (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
+def : Pat<(f64 (PPCVexts f64:$A, 2)),
+ (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+
+def : Pat<(f64 (extloadf32 iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf32 iaddrX4:$src), VSFRC)>;
+def : Pat<(f32 (fpround (f64 (extloadf32 iaddrX4:$src)))),
+ (f32 (DFLOADf32 iaddrX4:$src))>;
+
+def : Pat<(v4f32 (PPCldvsxlh xaddr:$src)),
+ (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC)>;
+def : Pat<(v4f32 (PPCldvsxlh iaddrX4:$src)),
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC)>;
+
+// Convert (Un)Signed DWord in memory -> QP
+def : Pat<(f128 (sint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSDX xaddrX4:$src)))>;
+def : Pat<(f128 (sint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVSDQP (LXSD iaddrX4:$src)))>;
+def : Pat<(f128 (uint_to_fp (i64 (load xaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSDX xaddrX4:$src)))>;
+def : Pat<(f128 (uint_to_fp (i64 (load iaddrX4:$src)))),
+ (f128 (XSCVUDQP (LXSD iaddrX4:$src)))>;
+
+// Convert Unsigned HWord in memory -> QP
+def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi16)),
+ (f128 (XSCVUDQP (LXSIHZX xaddr:$src)))>;
+
+// Convert Unsigned Byte in memory -> QP
+def : Pat<(f128 (uint_to_fp ScalarLoads.ZELi8)),
+ (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>;
+
+// Truncate & Convert QP -> (Un)Signed (D)Word.
+def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>;
+def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>;
+def : Pat<(i32 (fp_to_sint f128:$src)),
+ (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>;
+def : Pat<(i32 (fp_to_uint f128:$src)),
+ (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
+
+// Instructions for store(fptosi).
+// The 8-byte version is repeated here due to availability of D-Form STXSD.
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xaddrX4:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), iaddrX4:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), xoaddr:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPSXDS f64:$src), xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPSXDS f64:$src), iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 2),
+ (STXSIHX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), xoaddr:$dst, 1),
+ (STXSIBX (XSCVDPSXWS f64:$src), xoaddr:$dst)>;
+
+// Instructions for store(fptoui).
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xaddrX4:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), iaddrX4:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), xoaddr:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xaddrX4:$dst, 8),
+ (STXSDX (XSCVDPUXDS f64:$src), xaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), iaddrX4:$dst, 8),
+ (STXSD (XSCVDPUXDS f64:$src), iaddrX4:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 2),
+ (STXSIHX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1),
+ (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>;
+
+// Round & Convert QP -> DP/SP
+def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>;
+def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>;
+
+// Convert SP -> QP
+def : Pat<(f128 (fpextend f32:$src)),
+ (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>;
+
+def : Pat<(f32 (PPCxsmaxc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMAXCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+def : Pat<(f32 (PPCxsminc f32:$XA, f32:$XB)),
+ (f32 (COPY_TO_REGCLASS (XSMINCDP (COPY_TO_REGCLASS $XA, VSSRC),
+ (COPY_TO_REGCLASS $XB, VSSRC)),
+ VSSRC))>;
+
+// Endianness-neutral patterns for const splats with ISA 3.0 instructions.
+def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+ (v4i32 (MTVSRWS $A))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+ (v4i32 (MTVSRWS $A))>;
+def : Pat<(v16i8 (build_vector immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A,
+ immNonAllOneAnyExt8:$A, immNonAllOneAnyExt8:$A)),
+ (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
+def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
+ (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
+def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
+ (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
+def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+ (XSCVDPSXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
+def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
+ (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+ (XSCVDPUXWS (DFLOADf64 iaddrX4:$A)), VSRC), 1))>;
+def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
+ (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
+ (DFLOADf32 iaddrX4:$A),
+ VSFRC)), 0))>;
+def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
+ (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
+ (DFLOADf32 iaddrX4:$A),
+ VSFRC)), 0))>;
+def : Pat<(v4f32 (PPCldsplat xoaddr:$A)),
+ (v4f32 (LXVWSX xoaddr:$A))>;
+def : Pat<(v4i32 (PPCldsplat xoaddr:$A)),
+ (v4i32 (LXVWSX xoaddr:$A))>;
+} // HasVSX, HasP9Vector
+
+// Big endian Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in {
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+
+// Scalar stores of i8
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i16
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+ (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>;
+def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+ (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSRC))>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+
+// (Un)Signed DWord vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+
+// (Un)Signed Word vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 1)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
+foreach Idx = [0,2,3] in {
+ def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSW2D (VSPLTW Idx, $src)), sub_64)))>;
+}
+foreach Idx = 0-3 in {
+ def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, Idx)))),
+ (f128 (XSCVUDQP (XXEXTRACTUW $src, !shl(Idx, 2))))>;
+}
- let Predicates = [HasP9Altivec, IsBigEndian] in {
- def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
- (v2i64 (VEXTSW2D $A))>;
- def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
- (v2i64 (VEXTSH2D $A))>;
- def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
- HWordToWord.BE_A2, HWordToWord.BE_A3)),
- (v4i32 (VEXTSH2W $A))>;
- def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
- ByteToWord.BE_A2, ByteToWord.BE_A3)),
- (v4i32 (VEXTSB2W $A))>;
- def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
- (v2i64 (VEXTSB2D $A))>;
- }
+// (Un)Signed HWord vector extract -> QP
+foreach Idx = 0-7 in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, Idx), i16)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSH2D (VEXTRACTUH !add(Idx, Idx), $src)),
+ sub_64)))>;
+ // The SDAG adds the `and` since an `i16` is being extracted as an `i32`.
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v8i16:$src, Idx)), 65535))),
+ (f128 (XSCVUDQP (EXTRACT_SUBREG
+ (VEXTRACTUH !add(Idx, Idx), $src), sub_64)))>;
}
-// Put this P9Altivec related definition here since it's possible to be
-// selected to VSX instruction xvnegsp, avoid possible undef.
-let Predicates = [HasP9Altivec] in {
+// (Un)Signed Byte vector extract -> QP
+foreach Idx = 0-15 in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg (vector_extract v16i8:$src, Idx),
+ i8)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSB2D (VEXTRACTUB Idx, $src)), sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v16i8:$src, Idx)), 255))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (VEXTRACTUB Idx, $src), sub_64)))>;
+}
- def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
- (v4i32 (VABSDUW $A, $B))>;
+// Unsiged int in vsx register -> QP
+def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>;
+} // HasVSX, HasP9Vector, IsBigEndian
+
+// Little endian Power9 subtarget.
+let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in {
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 8)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 2)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 4)))>;
+def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
+ (f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 8))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 2)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>;
+def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
+ (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
+
+def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>;
+def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst),
+ (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)),
+ (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>;
+def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst),
+ (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i8
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 7)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 5)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 3)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 1)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 15)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 13)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 11)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+ (STXSIBXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 9)), VSRC), xoaddr:$dst)>;
+
+// Scalar stores of i16
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 4)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 2)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS $S, VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 14)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 12)), VSRC), xoaddr:$dst)>;
+def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 10)), VSRC), xoaddr:$dst)>;
+
+def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))),
+ (v2i64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
+
+def : Pat<(v2f64 (scalar_to_vector (f64 (load iaddrX4:$src)))),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSFRC), 2))>;
+def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddrX4:$src)))),
+ (v2f64 (XXPERMDIs
+ (COPY_TO_REGCLASS (XFLOADf64 xaddrX4:$src), VSFRC), 2))>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xaddrX4:$src),
+ (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2),
+ sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 0)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64),
+ iaddrX4:$src)>;
+def : Pat<(store (i64 (extractelt v2i64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+def : Pat<(store (f64 (extractelt v2f64:$A, 1)), iaddrX4:$src),
+ (DFSTOREf64 (EXTRACT_SUBREG $A, sub_64), iaddrX4:$src)>;
+
+// (Un)Signed DWord vector extract -> QP
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (sint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVSDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 0)))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG (XXPERMDI $src, $src, 3), sub_64)))>;
+def : Pat<(f128 (uint_to_fp (i64 (extractelt v2i64:$src, 1)))),
+ (f128 (XSCVUDQP (COPY_TO_REGCLASS $src, VFRC)))>;
+
+// (Un)Signed Word vector extract -> QP
+foreach Idx = [[0,3],[1,2],[3,0]] in {
+ def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG
+ (VEXTSW2D (VSPLTW !head(!tail(Idx)), $src)),
+ sub_64)))>;
+}
+def : Pat<(f128 (sint_to_fp (i32 (extractelt v4i32:$src, 2)))),
+ (f128 (XSCVSDQP (EXTRACT_SUBREG (VEXTSW2D $src), sub_64)))>;
- def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
- (v8i16 (VABSDUH $A, $B))>;
+foreach Idx = [[0,12],[1,8],[2,4],[3,0]] in {
+ def : Pat<(f128 (uint_to_fp (i32 (extractelt v4i32:$src, !head(Idx))))),
+ (f128 (XSCVUDQP (XXEXTRACTUW $src, !head(!tail(Idx)))))>;
+}
- def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
- (v16i8 (VABSDUB $A, $B))>;
+// (Un)Signed HWord vector extract -> QP
+// The Nested foreach lists identifies the vector element and corresponding
+// register byte location.
+foreach Idx = [[0,14],[1,12],[2,10],[3,8],[4,6],[5,4],[6,2],[7,0]] in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v8i16:$src, !head(Idx)), i16)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG (VEXTSH2D
+ (VEXTRACTUH !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v8i16:$src, !head(Idx))),
+ 65535))),
+ (f128 (XSCVUDQP (EXTRACT_SUBREG
+ (VEXTRACTUH !head(!tail(Idx)), $src), sub_64)))>;
+}
- // As PPCVABSD description, the last operand indicates whether do the
- // sign bit flip.
- def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
- (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+// (Un)Signed Byte vector extract -> QP
+foreach Idx = [[0,15],[1,14],[2,13],[3,12],[4,11],[5,10],[6,9],[7,8],[8,7],
+ [9,6],[10,5],[11,4],[12,3],[13,2],[14,1],[15,0]] in {
+ def : Pat<(f128 (sint_to_fp
+ (i32 (sext_inreg
+ (vector_extract v16i8:$src, !head(Idx)), i8)))),
+ (f128 (XSCVSDQP
+ (EXTRACT_SUBREG
+ (VEXTSB2D (VEXTRACTUB !head(!tail(Idx)), $src)),
+ sub_64)))>;
+ def : Pat<(f128 (uint_to_fp
+ (and (i32 (vector_extract v16i8:$src, !head(Idx))),
+ 255))),
+ (f128 (XSCVUDQP
+ (EXTRACT_SUBREG
+ (VEXTRACTUB !head(!tail(Idx)), $src), sub_64)))>;
}
+
+// Unsiged int in vsx register -> QP
+def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
+ (f128 (XSCVUDQP
+ (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 8)))>;
+} // HasVSX, HasP9Vector, IsLittleEndian
+
+// Any Power9 VSX subtarget that supports Power9 Altivec.
+let Predicates = [HasVSX, HasP9Altivec] in {
+// Put this P9Altivec related definition here since it's possible to be
+// selected to VSX instruction xvnegsp, avoid possible undef.
+def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+ (v4i32 (VABSDUW $A, $B))>;
+
+def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+ (v8i16 (VABSDUH $A, $B))>;
+
+def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+ (v16i8 (VABSDUB $A, $B))>;
+
+// As PPCVABSD description, the last operand indicates whether do the
+// sign bit flip.
+def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
+} // HasVSX, HasP9Altivec
+
+// Big endian Power9 VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in {
+def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBLX $Idx, $S)>;
+
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHLX (LI8 0), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHLX (LI8 2), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHLX (LI8 4), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHLX (LI8 6), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHLX (LI8 8), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHLX (LI8 10), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHLX (LI8 12), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHLX (LI8 14), $S)>;
+
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWLX (LI8 0), $S)>;
+
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (VEXTUWLX (LI8 8), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWLX (LI8 12), $S)>;
+
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWLX (LI8 0), $S))>;
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (VEXTUWLX (LI8 8), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWLX (LI8 12), $S))>;
+
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX $Idx, $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 1), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 3), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 5), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 7), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 9), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 11), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 13), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 14), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 (EXTRACT_SUBREG (VEXTUBLX (LI8 15), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX
+ (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHLX (LI8 14), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX
+ (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 0), $S), sub_32))>;
+// For extracting BE word 1, MFVSRWZ is better than VEXTUWLX
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUWLX (LI8 12), $S), sub_32))>;
+
+// P9 Altivec instructions that can be used to build vectors.
+// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+// with complexities of existing build vector patterns in this file.
+def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1,
+ HWordToWord.BE_A2, HWordToWord.BE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1,
+ ByteToWord.BE_A2, ByteToWord.BE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+} // HasVSX, HasP9Altivec, IsBigEndian
+
+// Little endian Power9 VSX subtargets with P9 Altivec support.
+let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in {
+def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))),
+ (VEXTUBRX $Idx, $S)>;
+
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))),
+ (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))),
+ (VEXTUHRX (LI8 0), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))),
+ (VEXTUHRX (LI8 2), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))),
+ (VEXTUHRX (LI8 4), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))),
+ (VEXTUHRX (LI8 6), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))),
+ (VEXTUHRX (LI8 8), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))),
+ (VEXTUHRX (LI8 10), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))),
+ (VEXTUHRX (LI8 12), $S)>;
+def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))),
+ (VEXTUHRX (LI8 14), $S)>;
+
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))),
+ (VEXTUWRX (LI8 0), $S)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))),
+ (VEXTUWRX (LI8 4), $S)>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32)>;
+def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))),
+ (VEXTUWRX (LI8 12), $S)>;
+
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))),
+ (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))),
+ (EXTSW (VEXTUWRX (LI8 0), $S))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))),
+ (EXTSW (VEXTUWRX (LI8 4), $S))>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))),
+ (EXTSW (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (i32 VectorExtractions.LE_WORD_2), sub_32))>;
+def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))),
+ (EXTSW (VEXTUWRX (LI8 12), $S))>;
+
+def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX $Idx, $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 1), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 3), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 5), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 7), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 9), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 11), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 13), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 14), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+ (i32 (EXTRACT_SUBREG (VEXTUBRX (LI8 15), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX
+ (RLWINM8 $Idx, 1, 28, 30), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 2), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 4), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 6), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 8), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 10), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 12), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+ (i32 (EXTRACT_SUBREG (VEXTUHRX (LI8 14), $S), sub_32))>;
+
+def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX
+ (RLWINM8 $Idx, 2, 28, 29), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 0), $S), sub_32))>;
+def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 4), $S), sub_32))>;
+// For extracting LE word 2, MFVSRWZ is better than VEXTUWRX
+def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+ (i32 VectorExtractions.LE_WORD_2)>;
+def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+ (i32 (EXTRACT_SUBREG (VEXTUWRX (LI8 12), $S), sub_32))>;
+
+// P9 Altivec instructions that can be used to build vectors.
+// Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete
+// with complexities of existing build vector patterns in this file.
+def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)),
+ (v2i64 (VEXTSW2D $A))>;
+def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)),
+ (v2i64 (VEXTSH2D $A))>;
+def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1,
+ HWordToWord.LE_A2, HWordToWord.LE_A3)),
+ (v4i32 (VEXTSH2W $A))>;
+def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1,
+ ByteToWord.LE_A2, ByteToWord.LE_A3)),
+ (v4i32 (VEXTSB2W $A))>;
+def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)),
+ (v2i64 (VEXTSB2D $A))>;
+} // HasVSX, HasP9Altivec, IsLittleEndian
+
+// Big endian VSX subtarget that supports additional direct moves from ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in {
+def : Pat<(i64 (extractelt v2i64:$A, 1)),
+ (i64 (MFVSRLD $A))>;
+// Better way to build integer vectors if we have MTVSRDD. Big endian.
+def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
+ (v2i64 (MTVSRDD $rB, $rA))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (MTVSRDD
+ (RLDIMI AnyExts.B, AnyExts.A, 32, 0),
+ (RLDIMI AnyExts.D, AnyExts.C, 32, 0))>;
+
+def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)),
+ (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
+} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian
+
+// Little endian VSX subtarget that supports direct moves from ISA3.0.
+let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in {
+def : Pat<(i64 (extractelt v2i64:$A, 0)),
+ (i64 (MFVSRLD $A))>;
+// Better way to build integer vectors if we have MTVSRDD. Little endian.
+def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
+ (v2i64 (MTVSRDD $rB, $rA))>;
+def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+ (MTVSRDD
+ (RLDIMI AnyExts.C, AnyExts.D, 32, 0),
+ (RLDIMI AnyExts.A, AnyExts.B, 32, 0))>;
+
+def : Pat<(f128 (PPCbuild_fp128 i64:$rA, i64:$rB)),
+ (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>;
+} // HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian
+} // AddedComplexity = 400
+
+//---------------------------- Instruction aliases ---------------------------//
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
+def : InstAlias<"mfvrd $rA, $XT",
+ (MFVRD g8rc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprd $rA, $src",
+ (MFVSRD g8rc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrd $XT, $rA",
+ (MTVRD vrrc:$XT, g8rc:$rA), 0>;
+def : InstAlias<"mtfprd $dst, $rA",
+ (MTVSRD f8rc:$dst, g8rc:$rA)>;
+def : InstAlias<"mfvrwz $rA, $XT",
+ (MFVRWZ gprc:$rA, vrrc:$XT), 0>;
+def : InstAlias<"mffprwz $rA, $src",
+ (MFVSRWZ gprc:$rA, f8rc:$src)>;
+def : InstAlias<"mtvrwa $XT, $rA",
+ (MTVRWA vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwa $dst, $rA",
+ (MTVSRWA f8rc:$dst, gprc:$rA)>;
+def : InstAlias<"mtvrwz $XT, $rA",
+ (MTVRWZ vrrc:$XT, gprc:$rA), 0>;
+def : InstAlias<"mtfprwz $dst, $rA",
+ (MTVSRWZ f8rc:$dst, gprc:$rA)>;
More information about the llvm-commits
mailing list