[llvm] r334898 - [X86] More additions to the load folding tables based on the autogenerated tables.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 16 16:25:50 PDT 2018
Author: ctopper
Date: Sat Jun 16 16:25:50 2018
New Revision: 334898
URL: http://llvm.org/viewvc/llvm-project?rev=334898&view=rev
Log:
[X86] More additions to the load folding tables based on the autogenerated tables.
Including more additions for NotMemoryFoldable to remove some entries from the autogenerated table.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrExtension.td
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86InstrXOP.td
llvm/trunk/utils/TableGen/X86FoldTablesEmitter.cpp
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=334898&r1=334897&r2=334898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Jun 16 16:25:50 2018
@@ -1959,7 +1959,7 @@ multiclass WriteFVarBlendask<bits<8> opc
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_KZ, Sched<[sched]>;
+ []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
@@ -1978,7 +1978,7 @@ multiclass WriteFVarBlendask<bits<8> opc
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
}
}
}
@@ -1999,7 +1999,7 @@ multiclass WriteFVarBlendask_rmb<bits<8>
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
@@ -2097,7 +2097,7 @@ multiclass avx512_cmp_scalar<X86VectorVT
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
- Sched<[sched]>;
+ Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2105,14 +2105,14 @@ multiclass avx512_cmp_scalar<X86VectorVT
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
- EVEX_4V, EVEX_B, Sched<[sched]>;
+ EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
}// let isAsmParserOnly = 1, hasSideEffects = 0
let isCodeGenOnly = 1 in {
@@ -2334,20 +2334,20 @@ multiclass avx512_icmp_cc<bits<8> opc, s
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
"$dst, $src1, $src2, $cc}"), []>,
- EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched]>;
+ EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
@@ -2355,7 +2355,8 @@ multiclass avx512_icmp_cc<bits<8> opc, s
!strconcat("vpcmp", Suffix,
"\t{$cc, $src2, $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, $src2, $cc}"), []>,
- EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
}
def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
@@ -2404,14 +2405,16 @@ multiclass avx512_icmp_cc_rmb<bits<8> op
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
"$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
(outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
_.ScalarMemOp:$src2, u8imm:$cc),
!strconcat("vpcmp", Suffix,
"\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
"$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
- EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
}
def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
@@ -2523,7 +2526,7 @@ multiclass avx512_vcmp_common<X86Foldabl
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched]>;
+ Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in {
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
@@ -2531,7 +2534,8 @@ multiclass avx512_vcmp_common<X86Foldabl
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc">,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2539,7 +2543,8 @@ multiclass avx512_vcmp_common<X86Foldabl
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr##", $cc">,
- EVEX_B, Sched<[sched.Folded, ReadAfterLd]>;
+ EVEX_B, Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
}
}
@@ -2589,7 +2594,7 @@ multiclass avx512_vcmp_sae<X86FoldableSc
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc">,
- EVEX_B, Sched<[sched]>;
+ EVEX_B, Sched<[sched]>, NotMemoryFoldable;
}
}
@@ -6289,7 +6294,7 @@ def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrc
(ins VR128X:$src1, VR128X:$src2),
"vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
+ Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
//===----------------------------------------------------------------------===//
// VMOVHPS/PD VMOVLPS Instructions
Modified: llvm/trunk/lib/Target/X86/X86InstrExtension.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrExtension.td?rev=334898&r1=334897&r2=334898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrExtension.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrExtension.td Sat Jun 16 16:25:50 2018
@@ -95,17 +95,17 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (out
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
- []>, TB, OpSize16, Sched<[WriteALU]>;
+ []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
- []>, TB, OpSize16, Sched<[WriteALU]>;
+ []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
let mayLoad = 1 in {
def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
- []>, OpSize16, TB, Sched<[WriteALULd]>;
+ []>, OpSize16, TB, Sched<[WriteALULd]>, NotMemoryFoldable;
def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
- []>, TB, OpSize16, Sched<[WriteALULd]>;
+ []>, TB, OpSize16, Sched<[WriteALULd]>, NotMemoryFoldable;
} // mayLoad = 1
} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=334898&r1=334897&r2=334898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Jun 16 16:25:50 2018
@@ -1003,6 +1003,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrm, 0 },
{ X86::VEXP2PDZr, X86::VEXP2PDZm, 0 },
{ X86::VEXP2PSZr, X86::VEXP2PSZm, 0 },
+ { X86::VEXPANDPDZrr, X86::VEXPANDPDZrm, TB_NO_REVERSE },
+ { X86::VEXPANDPSZrr, X86::VEXPANDPSZrm, TB_NO_REVERSE },
{ X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0 },
{ X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrm, 0 },
{ X86::VFPCLASSSDrr, X86::VFPCLASSSDrm, TB_NO_REVERSE },
@@ -1043,6 +1045,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
{ X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
{ X86::VPERMQZri, X86::VPERMQZmi, 0 },
+ { X86::VPEXPANDBZrr, X86::VPEXPANDBZrm, TB_NO_REVERSE },
+ { X86::VPEXPANDDZrr, X86::VPEXPANDDZrm, TB_NO_REVERSE },
+ { X86::VPEXPANDQZrr, X86::VPEXPANDQZrm, TB_NO_REVERSE },
+ { X86::VPEXPANDWZrr, X86::VPEXPANDWZrm, TB_NO_REVERSE },
{ X86::VPLZCNTDZrr, X86::VPLZCNTDZrm, 0 },
{ X86::VPLZCNTQZrr, X86::VPLZCNTQZrm, 0 },
{ X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
@@ -1129,6 +1135,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rm, 0 },
{ X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rm, 0 },
{ X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rm, 0 },
+ { X86::VEXPANDPDZ256rr, X86::VEXPANDPDZ256rm, TB_NO_REVERSE },
+ { X86::VEXPANDPSZ256rr, X86::VEXPANDPSZ256rm, TB_NO_REVERSE },
{ X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0 },
{ X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rm, 0 },
{ X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0 },
@@ -1162,6 +1170,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
{ X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
{ X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 },
+ { X86::VPEXPANDBZ256rr, X86::VPEXPANDBZ256rm, TB_NO_REVERSE },
+ { X86::VPEXPANDDZ256rr, X86::VPEXPANDDZ256rm, TB_NO_REVERSE },
+ { X86::VPEXPANDQZ256rr, X86::VPEXPANDQZ256rm, TB_NO_REVERSE },
+ { X86::VPEXPANDWZ256rr, X86::VPEXPANDWZ256rm, TB_NO_REVERSE },
{ X86::VPLZCNTDZ256rr, X86::VPLZCNTDZ256rm, 0 },
{ X86::VPLZCNTQZ256rr, X86::VPLZCNTQZ256rm, 0 },
{ X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE },
@@ -1238,6 +1250,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rm, 0 },
{ X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rm, 0 },
{ X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rm, 0 },
+ { X86::VEXPANDPDZ128rr, X86::VEXPANDPDZ128rm, TB_NO_REVERSE },
+ { X86::VEXPANDPSZ128rr, X86::VEXPANDPSZ128rm, TB_NO_REVERSE },
{ X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0 },
{ X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rm, 0 },
{ X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0 },
@@ -1269,6 +1283,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 },
{ X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
{ X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
+ { X86::VPEXPANDBZ128rr, X86::VPEXPANDBZ128rm, TB_NO_REVERSE },
+ { X86::VPEXPANDDZ128rr, X86::VPEXPANDDZ128rm, TB_NO_REVERSE },
+ { X86::VPEXPANDQZ128rr, X86::VPEXPANDQZ128rm, TB_NO_REVERSE },
+ { X86::VPEXPANDWZ128rr, X86::VPEXPANDWZ128rm, TB_NO_REVERSE },
{ X86::VPLZCNTDZ128rr, X86::VPLZCNTDZ128rm, 0 },
{ X86::VPLZCNTQZ128rr, X86::VPLZCNTQZ128rm, 0 },
{ X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
@@ -1966,6 +1984,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
{ X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 },
{ X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
+ { X86::VPCLMULQDQYrr, X86::VPCLMULQDQYrm, 0 },
{ X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
{ X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
{ X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
@@ -2258,6 +2277,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
{ X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
{ X86::VPBLENDMWZrr, X86::VPBLENDMWZrm, 0 },
+ { X86::VPCLMULQDQZrr, X86::VPCLMULQDQZrm, 0 },
{ X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
{ X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
{ X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
@@ -2522,6 +2542,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPBLENDMQZ256rr, X86::VPBLENDMQZ256rm, 0 },
{ X86::VPBLENDMWZ128rr, X86::VPBLENDMWZ128rm, 0 },
{ X86::VPBLENDMWZ256rr, X86::VPBLENDMWZ256rm, 0 },
+ { X86::VPCLMULQDQZ128rr, X86::VPCLMULQDQZ128rm, 0 },
+ { X86::VPCLMULQDQZ256rr, X86::VPCLMULQDQZ256rm, 0 },
{ X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
{ X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
{ X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
@@ -2772,18 +2794,66 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
// AVX-512 masked foldable instructions
+ { X86::VBROADCASTF32X2Zrkz,X86::VBROADCASTF32X2Zmkz,TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Zrkz,X86::VBROADCASTI32X2Zmkz,TB_NO_REVERSE },
{ X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE },
{ X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmkz, 0 },
+ { X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmkz, 0 },
+ { X86::VCVTPD2DQZrrkz, X86::VCVTPD2DQZrmkz, 0 },
+ { X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmkz, 0 },
+ { X86::VCVTPD2QQZrrkz, X86::VCVTPD2QQZrmkz, 0 },
+ { X86::VCVTPD2UDQZrrkz, X86::VCVTPD2UDQZrmkz, 0 },
+ { X86::VCVTPD2UQQZrrkz, X86::VCVTPD2UQQZrmkz, 0 },
+ { X86::VCVTPH2PSZrrkz, X86::VCVTPH2PSZrmkz, 0 },
+ { X86::VCVTPS2DQZrrkz, X86::VCVTPS2DQZrmkz, 0 },
+ { X86::VCVTPS2PDZrrkz, X86::VCVTPS2PDZrmkz, 0 },
+ { X86::VCVTPS2QQZrrkz, X86::VCVTPS2QQZrmkz, 0 },
+ { X86::VCVTPS2UDQZrrkz, X86::VCVTPS2UDQZrmkz, 0 },
+ { X86::VCVTPS2UQQZrrkz, X86::VCVTPS2UQQZrmkz, 0 },
+ { X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmkz, 0 },
+ { X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmkz, 0 },
+ { X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmkz, 0 },
+ { X86::VCVTTPD2QQZrrkz, X86::VCVTTPD2QQZrmkz, 0 },
+ { X86::VCVTTPD2UDQZrrkz, X86::VCVTTPD2UDQZrmkz, 0 },
+ { X86::VCVTTPD2UQQZrrkz, X86::VCVTTPD2UQQZrmkz, 0 },
+ { X86::VCVTTPS2DQZrrkz, X86::VCVTTPS2DQZrmkz, 0 },
+ { X86::VCVTTPS2QQZrrkz, X86::VCVTTPS2QQZrmkz, 0 },
+ { X86::VCVTTPS2UDQZrrkz, X86::VCVTTPS2UDQZrmkz, 0 },
+ { X86::VCVTTPS2UQQZrrkz, X86::VCVTTPS2UQQZrmkz, 0 },
+ { X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmkz, 0 },
+ { X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmkz, 0 },
+ { X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmkz, 0 },
+ { X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmkz, 0 },
+ { X86::VEXP2PDZrkz, X86::VEXP2PDZmkz, 0 },
+ { X86::VEXP2PSZrkz, X86::VEXP2PSZmkz, 0 },
+ { X86::VEXPANDPDZrrkz, X86::VEXPANDPDZrmkz, TB_NO_REVERSE },
+ { X86::VEXPANDPSZrrkz, X86::VEXPANDPSZrmkz, TB_NO_REVERSE },
+ { X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0 },
+ { X86::VGETEXPPSZrkz, X86::VGETEXPPSZmkz, 0 },
+ { X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0 },
+ { X86::VGETMANTPSZrrikz, X86::VGETMANTPSZrmikz, 0 },
+ { X86::VMOVDDUPZrrkz, X86::VMOVDDUPZrmkz, 0 },
+ { X86::VMOVSHDUPZrrkz, X86::VMOVSHDUPZrmkz, 0 },
+ { X86::VMOVSLDUPZrrkz, X86::VMOVSLDUPZrmkz, 0 },
{ X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
{ X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
{ X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
{ X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
+ { X86::VPBROADCASTBZrkz, X86::VPBROADCASTBZmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZrkz, X86::VPBROADCASTDZmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZrkz, X86::VPBROADCASTQZmkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZrkz, X86::VPBROADCASTWZmkz, TB_NO_REVERSE },
{ X86::VPCONFLICTDZrrkz, X86::VPCONFLICTDZrmkz, 0 },
{ X86::VPCONFLICTQZrrkz, X86::VPCONFLICTQZrmkz, 0 },
{ X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
{ X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
{ X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
{ X86::VPERMQZrikz, X86::VPERMQZmikz, 0 },
+ { X86::VPEXPANDBZrrkz, X86::VPEXPANDBZrmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDDZrrkz, X86::VPEXPANDDZrmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDQZrrkz, X86::VPEXPANDQZrmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDWZrrkz, X86::VPEXPANDWZrmkz, TB_NO_REVERSE },
{ X86::VPLZCNTDZrrkz, X86::VPLZCNTDZrmkz, 0 },
{ X86::VPLZCNTQZrrkz, X86::VPLZCNTQZrmkz, 0 },
{ X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
@@ -2802,6 +2872,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 },
{ X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 },
{ X86::VPOPCNTWZrrkz, X86::VPOPCNTWZrmkz, 0 },
+ { X86::VPROLDZrikz, X86::VPROLDZmikz, 0 },
+ { X86::VPROLQZrikz, X86::VPROLQZmikz, 0 },
+ { X86::VPRORDZrikz, X86::VPRORDZmikz, 0 },
+ { X86::VPRORQZrikz, X86::VPRORQZmikz, 0 },
{ X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
{ X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
{ X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
@@ -2814,20 +2888,80 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 },
{ X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 },
{ X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 },
+ { X86::VRCP14PDZrkz, X86::VRCP14PDZmkz, 0 },
+ { X86::VRCP14PSZrkz, X86::VRCP14PSZmkz, 0 },
+ { X86::VRCP28PDZrkz, X86::VRCP28PDZmkz, 0 },
+ { X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0 },
+ { X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0 },
+ { X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmikz, 0 },
+ { X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0 },
+ { X86::VRNDSCALEPSZrrikz, X86::VRNDSCALEPSZrmikz, 0 },
+ { X86::VRSQRT14PDZrkz, X86::VRSQRT14PDZmkz, 0 },
+ { X86::VRSQRT14PSZrkz, X86::VRSQRT14PSZmkz, 0 },
+ { X86::VRSQRT28PDZrkz, X86::VRSQRT28PDZmkz, 0 },
+ { X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0 },
+ { X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0 },
+ { X86::VSQRTPSZrkz, X86::VSQRTPSZmkz, 0 },
// AVX-512VL 256-bit masked foldable instructions
- { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTF32X2Z256rkz,X86::VBROADCASTF32X2Z256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z256rkz,X86::VBROADCASTI32X2Z256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmkz, 0 },
+ { X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmkz, 0 },
+ { X86::VCVTPD2DQZ256rrkz, X86::VCVTPD2DQZ256rmkz, 0 },
+ { X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmkz, 0 },
+ { X86::VCVTPD2QQZ256rrkz, X86::VCVTPD2QQZ256rmkz, 0 },
+ { X86::VCVTPD2UDQZ256rrkz,X86::VCVTPD2UDQZ256rmkz, 0 },
+ { X86::VCVTPD2UQQZ256rrkz,X86::VCVTPD2UQQZ256rmkz, 0 },
+ { X86::VCVTPH2PSZ256rrkz, X86::VCVTPH2PSZ256rmkz, 0 },
+ { X86::VCVTPS2DQZ256rrkz, X86::VCVTPS2DQZ256rmkz, 0 },
+ { X86::VCVTPS2PDZ256rrkz, X86::VCVTPS2PDZ256rmkz, 0 },
+ { X86::VCVTPS2QQZ256rrkz, X86::VCVTPS2QQZ256rmkz, 0 },
+ { X86::VCVTPS2UDQZ256rrkz,X86::VCVTPS2UDQZ256rmkz, 0 },
+ { X86::VCVTPS2UQQZ256rrkz,X86::VCVTPS2UQQZ256rmkz, 0 },
+ { X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmkz, 0 },
+ { X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmkz, 0 },
+ { X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmkz, 0 },
+ { X86::VCVTTPD2QQZ256rrkz, X86::VCVTTPD2QQZ256rmkz, 0 },
+ { X86::VCVTTPD2UDQZ256rrkz,X86::VCVTTPD2UDQZ256rmkz,0 },
+ { X86::VCVTTPD2UQQZ256rrkz,X86::VCVTTPD2UQQZ256rmkz,0 },
+ { X86::VCVTTPS2DQZ256rrkz, X86::VCVTTPS2DQZ256rmkz, 0 },
+ { X86::VCVTTPS2QQZ256rrkz, X86::VCVTTPS2QQZ256rmkz, 0 },
+ { X86::VCVTTPS2UDQZ256rrkz,X86::VCVTTPS2UDQZ256rmkz,0 },
+ { X86::VCVTTPS2UQQZ256rrkz,X86::VCVTTPS2UQQZ256rmkz,0 },
+ { X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmkz, 0 },
+ { X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmkz, 0 },
+ { X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmkz, 0 },
+ { X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmkz, 0 },
+ { X86::VEXPANDPDZ256rrkz, X86::VEXPANDPDZ256rmkz, TB_NO_REVERSE },
+ { X86::VEXPANDPSZ256rrkz, X86::VEXPANDPSZ256rmkz, TB_NO_REVERSE },
+ { X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0 },
+ { X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mkz, 0 },
+ { X86::VGETMANTPDZ256rrikz,X86::VGETMANTPDZ256rmikz,0 },
+ { X86::VGETMANTPSZ256rrikz,X86::VGETMANTPSZ256rmikz,0 },
+ { X86::VMOVDDUPZ256rrkz, X86::VMOVDDUPZ256rmkz, 0 },
+ { X86::VMOVSHDUPZ256rrkz, X86::VMOVSHDUPZ256rmkz, 0 },
+ { X86::VMOVSLDUPZ256rrkz, X86::VMOVSLDUPZ256rmkz, 0 },
{ X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
{ X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
{ X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
{ X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
+ { X86::VPBROADCASTBZ256rkz, X86::VPBROADCASTBZ256mkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ256rkz, X86::VPBROADCASTDZ256mkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ256rkz, X86::VPBROADCASTQZ256mkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ256rkz, X86::VPBROADCASTWZ256mkz, TB_NO_REVERSE },
{ X86::VPCONFLICTDZ256rrkz, X86::VPCONFLICTDZ256rmkz, 0 },
{ X86::VPCONFLICTQZ256rrkz, X86::VPCONFLICTQZ256rmkz, 0 },
{ X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
{ X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
{ X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
{ X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 },
+ { X86::VPEXPANDBZ256rrkz, X86::VPEXPANDBZ256rmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDDZ256rrkz, X86::VPEXPANDDZ256rmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDQZ256rrkz, X86::VPEXPANDQZ256rmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDWZ256rrkz, X86::VPEXPANDWZ256rmkz, TB_NO_REVERSE },
{ X86::VPLZCNTDZ256rrkz, X86::VPLZCNTDZ256rmkz, 0 },
{ X86::VPLZCNTQZ256rrkz, X86::VPLZCNTQZ256rmkz, 0 },
{ X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE },
@@ -2846,6 +2980,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPOPCNTDZ256rrkz, X86::VPOPCNTDZ256rmkz, 0 },
{ X86::VPOPCNTQZ256rrkz, X86::VPOPCNTQZ256rmkz, 0 },
{ X86::VPOPCNTWZ256rrkz, X86::VPOPCNTWZ256rmkz, 0 },
+ { X86::VPROLDZ256rikz, X86::VPROLDZ256mikz, 0 },
+ { X86::VPROLQZ256rikz, X86::VPROLQZ256mikz, 0 },
+ { X86::VPRORDZ256rikz, X86::VPRORDZ256mikz, 0 },
+ { X86::VPRORQZ256rikz, X86::VPRORQZ256mikz, 0 },
{ X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
{ X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
{ X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
@@ -2858,17 +2996,72 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 },
{ X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 },
{ X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 },
+ { X86::VRCP14PDZ256rkz, X86::VRCP14PDZ256mkz, 0 },
+ { X86::VRCP14PSZ256rkz, X86::VRCP14PSZ256mkz, 0 },
+ { X86::VREDUCEPDZ256rrikz,X86::VREDUCEPDZ256rmikz, 0 },
+ { X86::VREDUCEPSZ256rrikz,X86::VREDUCEPSZ256rmikz, 0 },
+ { X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0 },
+ { X86::VRNDSCALEPSZ256rrikz, X86::VRNDSCALEPSZ256rmikz, 0 },
+ { X86::VRSQRT14PDZ256rkz, X86::VRSQRT14PDZ256mkz, 0 },
+ { X86::VRSQRT14PSZ256rkz, X86::VRSQRT14PSZ256mkz, 0 },
+ { X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0 },
+ { X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mkz, 0 },
// AVX-512VL 128-bit masked foldable instructions
- { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z128rkz,X86::VBROADCASTI32X2Z128mkz, TB_NO_REVERSE },
+ { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmkz, 0 },
+ { X86::VCVTPD2DQZ128rrkz, X86::VCVTPD2DQZ128rmkz, 0 },
+ { X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmkz, 0 },
+ { X86::VCVTPD2QQZ128rrkz, X86::VCVTPD2QQZ128rmkz, 0 },
+ { X86::VCVTPD2UDQZ128rrkz,X86::VCVTPD2UDQZ128rmkz, 0 },
+ { X86::VCVTPD2UQQZ128rrkz,X86::VCVTPD2UQQZ128rmkz, 0 },
+ { X86::VCVTPH2PSZ128rrkz, X86::VCVTPH2PSZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTPS2DQZ128rrkz, X86::VCVTPS2DQZ128rmkz, 0 },
+ { X86::VCVTPS2PDZ128rrkz, X86::VCVTPS2PDZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTPS2QQZ128rrkz, X86::VCVTPS2QQZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTPS2UDQZ128rrkz,X86::VCVTPS2UDQZ128rmkz, 0 },
+ { X86::VCVTPS2UQQZ128rrkz,X86::VCVTPS2UQQZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmkz, 0 },
+ { X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmkz, 0 },
+ { X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmkz, 0 },
+ { X86::VCVTTPD2QQZ128rrkz, X86::VCVTTPD2QQZ128rmkz, 0 },
+ { X86::VCVTTPD2UDQZ128rrkz,X86::VCVTTPD2UDQZ128rmkz,0 },
+ { X86::VCVTTPD2UQQZ128rrkz,X86::VCVTTPD2UQQZ128rmkz,0 },
+ { X86::VCVTTPS2DQZ128rrkz, X86::VCVTTPS2DQZ128rmkz, 0 },
+ { X86::VCVTTPS2QQZ128rrkz, X86::VCVTTPS2QQZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTTPS2UDQZ128rrkz,X86::VCVTTPS2UDQZ128rmkz,0 },
+ { X86::VCVTTPS2UQQZ128rrkz,X86::VCVTTPS2UQQZ128rmkz,TB_NO_REVERSE },
+ { X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmkz, TB_NO_REVERSE },
+ { X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmkz, 0 },
+ { X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmkz, 0 },
+ { X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmkz, 0 },
+ { X86::VEXPANDPDZ128rrkz, X86::VEXPANDPDZ128rmkz, TB_NO_REVERSE },
+ { X86::VEXPANDPSZ128rrkz, X86::VEXPANDPSZ128rmkz, TB_NO_REVERSE },
+ { X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0 },
+ { X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mkz, 0 },
+ { X86::VGETMANTPDZ128rrikz,X86::VGETMANTPDZ128rmikz,0 },
+ { X86::VGETMANTPSZ128rrikz,X86::VGETMANTPSZ128rmikz,0 },
+ { X86::VMOVDDUPZ128rrkz, X86::VMOVDDUPZ128rmkz, TB_NO_REVERSE },
+ { X86::VMOVSHDUPZ128rrkz, X86::VMOVSHDUPZ128rmkz, 0 },
+ { X86::VMOVSLDUPZ128rrkz, X86::VMOVSLDUPZ128rmkz, 0 },
{ X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
{ X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
{ X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
{ X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
+ { X86::VPBROADCASTBZ128rkz, X86::VPBROADCASTBZ128mkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ128rkz, X86::VPBROADCASTDZ128mkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ128rkz, X86::VPBROADCASTQZ128mkz, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ128rkz, X86::VPBROADCASTWZ128mkz, TB_NO_REVERSE },
{ X86::VPCONFLICTDZ128rrkz, X86::VPCONFLICTDZ128rmkz, 0 },
{ X86::VPCONFLICTQZ128rrkz, X86::VPCONFLICTQZ128rmkz, 0 },
{ X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
{ X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
+ { X86::VPEXPANDBZ128rrkz, X86::VPEXPANDBZ128rmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDDZ128rrkz, X86::VPEXPANDDZ128rmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDQZ128rrkz, X86::VPEXPANDQZ128rmkz, TB_NO_REVERSE },
+ { X86::VPEXPANDWZ128rrkz, X86::VPEXPANDWZ128rmkz, TB_NO_REVERSE },
{ X86::VPLZCNTDZ128rrkz, X86::VPLZCNTDZ128rmkz, 0 },
{ X86::VPLZCNTQZ128rrkz, X86::VPLZCNTQZ128rmkz, 0 },
{ X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
@@ -2887,6 +3080,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPOPCNTDZ128rrkz, X86::VPOPCNTDZ128rmkz, 0 },
{ X86::VPOPCNTQZ128rrkz, X86::VPOPCNTQZ128rmkz, 0 },
{ X86::VPOPCNTWZ128rrkz, X86::VPOPCNTWZ128rmkz, 0 },
+ { X86::VPROLDZ128rikz, X86::VPROLDZ128mikz, 0 },
+ { X86::VPROLQZ128rikz, X86::VPROLQZ128mikz, 0 },
+ { X86::VPRORDZ128rikz, X86::VPRORDZ128mikz, 0 },
+ { X86::VPRORQZ128rikz, X86::VPRORQZ128mikz, 0 },
{ X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
{ X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
{ X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
@@ -2899,6 +3096,59 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 },
{ X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 },
{ X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 },
+ { X86::VRCP14PDZ128rkz, X86::VRCP14PDZ128mkz, 0 },
+ { X86::VRCP14PSZ128rkz, X86::VRCP14PSZ128mkz, 0 },
+ { X86::VREDUCEPDZ128rrikz,X86::VREDUCEPDZ128rmikz, 0 },
+ { X86::VREDUCEPSZ128rrikz,X86::VREDUCEPSZ128rmikz, 0 },
+ { X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0 },
+ { X86::VRNDSCALEPSZ128rrikz, X86::VRNDSCALEPSZ128rmikz, 0 },
+ { X86::VRSQRT14PDZ128rkz, X86::VRSQRT14PDZ128mkz, 0 },
+ { X86::VRSQRT14PSZ128rkz, X86::VRSQRT14PSZ128mkz, 0 },
+ { X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0 },
+ { X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mkz, 0 },
+
+ // AVX512 masked move/load instructions. These use TB_NO_REVERSE to prevent
+ // unfolding creating an unmasked load.
+ { X86::VMOVAPDZ128rrkz, X86::VMOVAPDZ128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVAPDZ256rrkz, X86::VMOVAPDZ256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVAPDZrrkz, X86::VMOVAPDZrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVAPSZ128rrkz, X86::VMOVAPSZ128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVAPSZ256rrkz, X86::VMOVAPSZ256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVAPSZrrkz, X86::VMOVAPSZrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVDQA32Z128rrkz, X86::VMOVDQA32Z128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVDQA32Z256rrkz, X86::VMOVDQA32Z256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVDQA32Zrrkz, X86::VMOVDQA32Zrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVDQA64Z128rrkz, X86::VMOVDQA64Z128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVDQA64Z256rrkz, X86::VMOVDQA64Z256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVDQA64Zrrkz, X86::VMOVDQA64Zrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVDQU16Z128rrkz, X86::VMOVDQU16Z128rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU16Z256rrkz, X86::VMOVDQU16Z256rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU16Zrrkz, X86::VMOVDQU16Zrmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU32Z128rrkz, X86::VMOVDQU32Z128rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU32Z256rrkz, X86::VMOVDQU32Z256rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU32Zrrkz, X86::VMOVDQU32Zrmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU64Z128rrkz, X86::VMOVDQU64Z128rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU64Z256rrkz, X86::VMOVDQU64Z256rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU64Zrrkz, X86::VMOVDQU64Zrmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU8Z128rrkz, X86::VMOVDQU8Z128rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU8Z256rrkz, X86::VMOVDQU8Z256rmkz, TB_NO_REVERSE },
+ { X86::VMOVDQU8Zrrkz, X86::VMOVDQU8Zrmkz, TB_NO_REVERSE },
+ { X86::VMOVUPDZ128rrkz, X86::VMOVUPDZ128rmkz, TB_NO_REVERSE },
+ { X86::VMOVUPDZ256rrkz, X86::VMOVUPDZ256rmkz, TB_NO_REVERSE },
+ { X86::VMOVUPDZrrkz, X86::VMOVUPDZrmkz, TB_NO_REVERSE },
+ { X86::VMOVUPSZ128rrkz, X86::VMOVUPSZ128rmkz, TB_NO_REVERSE },
+ { X86::VMOVUPSZ256rrkz, X86::VMOVUPSZ256rmkz, TB_NO_REVERSE },
+ { X86::VMOVUPSZrrkz, X86::VMOVUPSZrmkz, TB_NO_REVERSE },
+
+ // AVX512 masked FPCLASS instructions.
+ { X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0 },
+ { X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0 },
+ { X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0 },
+ { X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmk, 0 },
+ { X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmk, 0 },
+ { X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmk, 0 },
+ { X86::VFPCLASSSDrrk, X86::VFPCLASSSDrmk, TB_NO_REVERSE },
+ { X86::VFPCLASSSSrrk, X86::VFPCLASSSSrmk, TB_NO_REVERSE },
// AES foldable instructions
{ X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
@@ -3118,10 +3368,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 },
{ X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 },
{ X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
+ { X86::VCVTSD2SSZrr_Intkz, X86::VCVTSD2SSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VCVTSS2SDZrr_Intkz, X86::VCVTSS2SDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDBPSADBWZrrikz, X86::VDBPSADBWZrmikz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
{ X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
+ { X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE },
+ { X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE },
+ { X86::VGETMANTSDZrrikz, X86::VGETMANTSDZrmikz, TB_NO_REVERSE },
+ { X86::VGETMANTSSZrrikz, X86::VGETMANTSSZrmikz, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
{ X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
{ X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
@@ -3167,6 +3424,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
{ X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
{ X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
+ { X86::VPBROADCASTBZrk, X86::VPBROADCASTBZmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZrk, X86::VPBROADCASTDZmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZrk, X86::VPBROADCASTQZmk, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZrk, X86::VPBROADCASTWZmk, TB_NO_REVERSE },
{ X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
{ X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
{ X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
@@ -3193,13 +3454,28 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
{ X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
{ X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
+ { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
+ { X86::VPMULHRSWZrrkz, X86::VPMULHRSWZrmkz, 0 },
+ { X86::VPMULHUWZrrkz, X86::VPMULHUWZrmkz, 0 },
+ { X86::VPMULHWZrrkz, X86::VPMULHWZrmkz, 0 },
{ X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
{ X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
{ X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
- { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
+ { X86::VPMULTISHIFTQBZrrkz, X86::VPMULTISHIFTQBZrmkz, 0 },
{ X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 },
{ X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
{ X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
+ { X86::VPROLVDZrrkz, X86::VPROLVDZrmkz, 0 },
+ { X86::VPROLVQZrrkz, X86::VPROLVQZrmkz, 0 },
+ { X86::VPRORVDZrrkz, X86::VPRORVDZrmkz, 0 },
+ { X86::VPRORVQZrrkz, X86::VPRORVQZrmkz, 0 },
+ { X86::VPSHLDDZrrikz, X86::VPSHLDDZrmikz, 0 },
+ { X86::VPSHLDQZrrikz, X86::VPSHLDQZrmikz, 0 },
+ { X86::VPSHLDWZrrikz, X86::VPSHLDWZrmikz, 0 },
+ { X86::VPSHRDDZrrikz, X86::VPSHRDDZrmikz, 0 },
+ { X86::VPSHRDQZrrikz, X86::VPSHRDQZrmikz, 0 },
+ { X86::VPSHRDWZrrikz, X86::VPSHRDWZrmikz, 0 },
+ { X86::VPSHUFBITQMBZrrk, X86::VPSHUFBITQMBZrmk, 0 },
{ X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
{ X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
{ X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
@@ -3237,12 +3513,34 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
{ X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
{ X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
+ { X86::VRANGEPDZrrikz, X86::VRANGEPDZrmikz, 0 },
+ { X86::VRANGEPSZrrikz, X86::VRANGEPSZrmikz, 0 },
+ { X86::VRANGESDZrrikz, X86::VRANGESDZrmikz, TB_NO_REVERSE },
+ { X86::VRANGESSZrrikz, X86::VRANGESSZrmikz, TB_NO_REVERSE },
+ { X86::VRCP14SDZrrkz, X86::VRCP14SDZrmkz, TB_NO_REVERSE },
+ { X86::VRCP14SSZrrkz, X86::VRCP14SSZrmkz, TB_NO_REVERSE },
+ { X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE },
+ { X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE },
+ { X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE },
+ { X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE },
+ { X86::VRNDSCALESDZr_Intkz, X86::VRNDSCALESDZm_Intkz, TB_NO_REVERSE },
+ { X86::VRNDSCALESSZr_Intkz, X86::VRNDSCALESSZm_Intkz, TB_NO_REVERSE },
+ { X86::VRSQRT14SDZrrkz, X86::VRSQRT14SDZrmkz, TB_NO_REVERSE },
+ { X86::VRSQRT14SSZrrkz, X86::VRSQRT14SSZrmkz, TB_NO_REVERSE },
+ { X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE },
+ { X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE },
+ { X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0 },
+ { X86::VSCALEFPSZrrkz, X86::VSCALEFPSZrmkz, 0 },
+ { X86::VSCALEFSDZrrkz, X86::VSCALEFSDZrmkz, TB_NO_REVERSE },
+ { X86::VSCALEFSSZrrkz, X86::VSCALEFSSZrmkz, TB_NO_REVERSE },
{ X86::VSHUFF32X4Zrrikz, X86::VSHUFF32X4Zrmikz, 0 },
{ X86::VSHUFF64X2Zrrikz, X86::VSHUFF64X2Zrmikz, 0 },
{ X86::VSHUFI32X4Zrrikz, X86::VSHUFI32X4Zrmikz, 0 },
{ X86::VSHUFI64X2Zrrikz, X86::VSHUFI64X2Zrmikz, 0 },
{ X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 },
{ X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 },
+ { X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE },
+ { X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
{ X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
@@ -3263,6 +3561,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 },
{ X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 },
{ X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 },
+ { X86::VDBPSADBWZ256rrikz, X86::VDBPSADBWZ256rmikz, 0 },
{ X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
{ X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
{ X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 },
@@ -3300,6 +3599,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
{ X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
{ X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
+ { X86::VPBROADCASTBZ256rk, X86::VPBROADCASTBZ256mk, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ256rk, X86::VPBROADCASTDZ256mk, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ256rk, X86::VPBROADCASTQZ256mk, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ256rk, X86::VPBROADCASTWZ256mk, TB_NO_REVERSE },
{ X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
{ X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
{ X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
@@ -3327,12 +3630,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
{ X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
{ X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
+ { X86::VPMULHRSWZ256rrkz, X86::VPMULHRSWZ256rmkz, 0 },
+ { X86::VPMULHUWZ256rrkz, X86::VPMULHUWZ256rmkz, 0 },
+ { X86::VPMULHWZ256rrkz, X86::VPMULHWZ256rmkz, 0 },
{ X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
{ X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
{ X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 },
+ { X86::VPMULTISHIFTQBZ256rrkz,X86::VPMULTISHIFTQBZ256rmkz,0 },
{ X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 },
{ X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
{ X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
+ { X86::VPROLVDZ256rrkz, X86::VPROLVDZ256rmkz, 0 },
+ { X86::VPROLVQZ256rrkz, X86::VPROLVQZ256rmkz, 0 },
+ { X86::VPRORVDZ256rrkz, X86::VPRORVDZ256rmkz, 0 },
+ { X86::VPRORVQZ256rrkz, X86::VPRORVQZ256rmkz, 0 },
+ { X86::VPSHLDDZ256rrikz, X86::VPSHLDDZ256rmikz, 0 },
+ { X86::VPSHLDQZ256rrikz, X86::VPSHLDQZ256rmikz, 0 },
+ { X86::VPSHLDWZ256rrikz, X86::VPSHLDWZ256rmikz, 0 },
+ { X86::VPSHRDDZ256rrikz, X86::VPSHRDDZ256rmikz, 0 },
+ { X86::VPSHRDQZ256rrikz, X86::VPSHRDQZ256rmikz, 0 },
+ { X86::VPSHRDWZ256rrikz, X86::VPSHRDWZ256rmikz, 0 },
+ { X86::VPSHUFBITQMBZ256rrk, X86::VPSHUFBITQMBZ256rmk, 0 },
{ X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
{ X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
{ X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
@@ -3370,6 +3688,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
{ X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
{ X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
+ { X86::VRANGEPDZ256rrikz, X86::VRANGEPDZ256rmikz, 0 },
+ { X86::VRANGEPSZ256rrikz, X86::VRANGEPSZ256rmikz, 0 },
+ { X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0 },
+ { X86::VSCALEFPSZ256rrkz, X86::VSCALEFPSZ256rmkz, 0 },
{ X86::VSHUFF32X4Z256rrikz, X86::VSHUFF32X4Z256rmikz, 0 },
{ X86::VSHUFF64X2Z256rrikz, X86::VSHUFF64X2Z256rmikz, 0 },
{ X86::VSHUFI32X4Z256rrikz, X86::VSHUFI32X4Z256rmikz, 0 },
@@ -3394,6 +3716,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 },
{ X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 },
{ X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 },
+ { X86::VDBPSADBWZ128rrikz, X86::VDBPSADBWZ128rmikz, 0 },
{ X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
{ X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
{ X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
@@ -3427,6 +3750,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
{ X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
{ X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
+ { X86::VPBROADCASTBZ128rk, X86::VPBROADCASTBZ128mk, TB_NO_REVERSE },
+ { X86::VPBROADCASTDZ128rk, X86::VPBROADCASTDZ128mk, TB_NO_REVERSE },
+ { X86::VPBROADCASTQZ128rk, X86::VPBROADCASTQZ128mk, TB_NO_REVERSE },
+ { X86::VPBROADCASTWZ128rk, X86::VPBROADCASTWZ128mk, TB_NO_REVERSE },
{ X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
{ X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
{ X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
@@ -3450,12 +3777,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
{ X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
{ X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
+ { X86::VPMULHRSWZ128rrkz, X86::VPMULHRSWZ128rmkz, 0 },
+ { X86::VPMULHUWZ128rrkz, X86::VPMULHUWZ128rmkz, 0 },
+ { X86::VPMULHWZ128rrkz, X86::VPMULHWZ128rmkz, 0 },
{ X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
{ X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
{ X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 },
+ { X86::VPMULTISHIFTQBZ128rrkz,X86::VPMULTISHIFTQBZ128rmkz,0 },
{ X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 },
{ X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
{ X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
+ { X86::VPROLVDZ128rrkz, X86::VPROLVDZ128rmkz, 0 },
+ { X86::VPROLVQZ128rrkz, X86::VPROLVQZ128rmkz, 0 },
+ { X86::VPRORVDZ128rrkz, X86::VPRORVDZ128rmkz, 0 },
+ { X86::VPRORVQZ128rrkz, X86::VPRORVQZ128rmkz, 0 },
+ { X86::VPSHLDDZ128rrikz, X86::VPSHLDDZ128rmikz, 0 },
+ { X86::VPSHLDQZ128rrikz, X86::VPSHLDQZ128rmikz, 0 },
+ { X86::VPSHLDWZ128rrikz, X86::VPSHLDWZ128rmikz, 0 },
+ { X86::VPSHRDDZ128rrikz, X86::VPSHRDDZ128rmikz, 0 },
+ { X86::VPSHRDQZ128rrikz, X86::VPSHRDQZ128rmikz, 0 },
+ { X86::VPSHRDWZ128rrikz, X86::VPSHRDWZ128rmikz, 0 },
+ { X86::VPSHUFBITQMBZ128rrk, X86::VPSHUFBITQMBZ128rmk, 0 },
{ X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
{ X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
{ X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
@@ -3493,6 +3835,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
{ X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
{ X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
+ { X86::VRANGEPDZ128rrikz, X86::VRANGEPDZ128rmikz, 0 },
+ { X86::VRANGEPSZ128rrikz, X86::VRANGEPSZ128rmikz, 0 },
+ { X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0 },
+ { X86::VSCALEFPSZ128rrkz, X86::VSCALEFPSZ128rmkz, 0 },
{ X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 },
{ X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 },
{ X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
@@ -3504,19 +3850,80 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 },
{ X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
+ // GFNI instructions with zero masking.
+ { X86::VGF2P8AFFINEINVQBZ128rrikz, X86::VGF2P8AFFINEINVQBZ128rmikz, 0 },
+ { X86::VGF2P8AFFINEINVQBZ256rrikz, X86::VGF2P8AFFINEINVQBZ256rmikz, 0 },
+ { X86::VGF2P8AFFINEINVQBZrrikz, X86::VGF2P8AFFINEINVQBZrmikz, 0 },
+ { X86::VGF2P8AFFINEQBZ128rrikz, X86::VGF2P8AFFINEQBZ128rmikz, 0 },
+ { X86::VGF2P8AFFINEQBZ256rrikz, X86::VGF2P8AFFINEQBZ256rmikz, 0 },
+ { X86::VGF2P8AFFINEQBZrrikz, X86::VGF2P8AFFINEQBZrmikz, 0 },
+ { X86::VGF2P8MULBZ128rrkz, X86::VGF2P8MULBZ128rmkz, 0 },
+ { X86::VGF2P8MULBZ256rrkz, X86::VGF2P8MULBZ256rmkz, 0 },
+ { X86::VGF2P8MULBZrrkz, X86::VGF2P8MULBZrmkz, 0 },
+
// AVX-512 masked foldable instructions
+ { X86::VBLENDMPDZrrk, X86::VBLENDMPDZrmk, 0 },
+ { X86::VBLENDMPSZrrk, X86::VBLENDMPSZrmk, 0 },
+ { X86::VBROADCASTF32X2Zrk, X86::VBROADCASTF32X2Zmk, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Zrk, X86::VBROADCASTI32X2Zmk, TB_NO_REVERSE },
{ X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE },
{ X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmk, 0 },
+ { X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmk, 0 },
+ { X86::VCVTPD2DQZrrk, X86::VCVTPD2DQZrmk, 0 },
+ { X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmk, 0 },
+ { X86::VCVTPD2QQZrrk, X86::VCVTPD2QQZrmk, 0 },
+ { X86::VCVTPD2UDQZrrk, X86::VCVTPD2UDQZrmk, 0 },
+ { X86::VCVTPD2UQQZrrk, X86::VCVTPD2UQQZrmk, 0 },
+ { X86::VCVTPH2PSZrrk, X86::VCVTPH2PSZrmk, 0 },
+ { X86::VCVTPS2DQZrrk, X86::VCVTPS2DQZrmk, 0 },
+ { X86::VCVTPS2PDZrrk, X86::VCVTPS2PDZrmk, 0 },
+ { X86::VCVTPS2QQZrrk, X86::VCVTPS2QQZrmk, 0 },
+ { X86::VCVTPS2UDQZrrk, X86::VCVTPS2UDQZrmk, 0 },
+ { X86::VCVTPS2UQQZrrk, X86::VCVTPS2UQQZrmk, 0 },
+ { X86::VCVTQQ2PDZrrk, X86::VCVTQQ2PDZrmk, 0 },
+ { X86::VCVTQQ2PSZrrk, X86::VCVTQQ2PSZrmk, 0 },
+ { X86::VCVTTPD2DQZrrk, X86::VCVTTPD2DQZrmk, 0 },
+ { X86::VCVTTPD2QQZrrk, X86::VCVTTPD2QQZrmk, 0 },
+ { X86::VCVTTPD2UDQZrrk, X86::VCVTTPD2UDQZrmk, 0 },
+ { X86::VCVTTPD2UQQZrrk, X86::VCVTTPD2UQQZrmk, 0 },
+ { X86::VCVTTPS2DQZrrk, X86::VCVTTPS2DQZrmk, 0 },
+ { X86::VCVTTPS2QQZrrk, X86::VCVTTPS2QQZrmk, 0 },
+ { X86::VCVTTPS2UDQZrrk, X86::VCVTTPS2UDQZrmk, 0 },
+ { X86::VCVTTPS2UQQZrrk, X86::VCVTTPS2UQQZrmk, 0 },
+ { X86::VCVTUDQ2PDZrrk, X86::VCVTUDQ2PDZrmk, 0 },
+ { X86::VCVTUDQ2PSZrrk, X86::VCVTUDQ2PSZrmk, 0 },
+ { X86::VCVTUQQ2PDZrrk, X86::VCVTUQQ2PDZrmk, 0 },
+ { X86::VCVTUQQ2PSZrrk, X86::VCVTUQQ2PSZrmk, 0 },
+ { X86::VEXP2PDZrk, X86::VEXP2PDZmk, 0 },
+ { X86::VEXP2PSZrk, X86::VEXP2PSZmk, 0 },
+ { X86::VEXPANDPDZrrk, X86::VEXPANDPDZrmk, TB_NO_REVERSE },
+ { X86::VEXPANDPSZrrk, X86::VEXPANDPSZrmk, TB_NO_REVERSE },
+ { X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0 },
+ { X86::VGETEXPPSZrk, X86::VGETEXPPSZmk, 0 },
+ { X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0 },
+ { X86::VGETMANTPSZrrik, X86::VGETMANTPSZrmik, 0 },
+ { X86::VMOVDDUPZrrk, X86::VMOVDDUPZrmk, 0 },
+ { X86::VMOVSHDUPZrrk, X86::VMOVSHDUPZrmk, 0 },
+ { X86::VMOVSLDUPZrrk, X86::VMOVSLDUPZrmk, 0 },
{ X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
{ X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
{ X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
{ X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
+ { X86::VPBLENDMBZrrk, X86::VPBLENDMBZrmk, 0 },
+ { X86::VPBLENDMDZrrk, X86::VPBLENDMDZrmk, 0 },
+ { X86::VPBLENDMQZrrk, X86::VPBLENDMQZrmk, 0 },
+ { X86::VPBLENDMWZrrk, X86::VPBLENDMWZrmk, 0 },
{ X86::VPCONFLICTDZrrk, X86::VPCONFLICTDZrmk, 0 },
{ X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 },
{ X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
{ X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
{ X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
{ X86::VPERMQZrik, X86::VPERMQZmik, 0 },
+ { X86::VPEXPANDBZrrk, X86::VPEXPANDBZrmk, TB_NO_REVERSE },
+ { X86::VPEXPANDDZrrk, X86::VPEXPANDDZrmk, TB_NO_REVERSE },
+ { X86::VPEXPANDQZrrk, X86::VPEXPANDQZrmk, TB_NO_REVERSE },
+ { X86::VPEXPANDWZrrk, X86::VPEXPANDWZrmk, TB_NO_REVERSE },
{ X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmk, 0 },
{ X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 },
{ X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
@@ -3535,6 +3942,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 },
{ X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 },
{ X86::VPOPCNTWZrrk, X86::VPOPCNTWZrmk, 0 },
+ { X86::VPROLDZrik, X86::VPROLDZmik, 0 },
+ { X86::VPROLQZrik, X86::VPROLQZmik, 0 },
+ { X86::VPRORDZrik, X86::VPRORDZmik, 0 },
+ { X86::VPRORQZrik, X86::VPRORQZmik, 0 },
{ X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
{ X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
{ X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
@@ -3547,20 +3958,82 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZrik, X86::VPSRLDZmik, 0 },
{ X86::VPSRLQZrik, X86::VPSRLQZmik, 0 },
{ X86::VPSRLWZrik, X86::VPSRLWZmik, 0 },
+ { X86::VRCP14PDZrk, X86::VRCP14PDZmk, 0 },
+ { X86::VRCP14PSZrk, X86::VRCP14PSZmk, 0 },
+ { X86::VRCP28PDZrk, X86::VRCP28PDZmk, 0 },
+ { X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0 },
+ { X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0 },
+ { X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmik, 0 },
+ { X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0 },
+ { X86::VRNDSCALEPSZrrik, X86::VRNDSCALEPSZrmik, 0 },
+ { X86::VRSQRT14PDZrk, X86::VRSQRT14PDZmk, 0 },
+ { X86::VRSQRT14PSZrk, X86::VRSQRT14PSZmk, 0 },
+ { X86::VRSQRT28PDZrk, X86::VRSQRT28PDZmk, 0 },
+ { X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0 },
+ { X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0 },
+ { X86::VSQRTPSZrk, X86::VSQRTPSZmk, 0 },
// AVX-512VL 256-bit masked foldable instructions
+ { X86::VBLENDMPDZ256rrk, X86::VBLENDMPDZ256rmk, 0 },
+ { X86::VBLENDMPSZ256rrk, X86::VBLENDMPSZ256rmk, 0 },
+ { X86::VBROADCASTF32X2Z256rk, X86::VBROADCASTF32X2Z256mk, TB_NO_REVERSE },
+ { X86::VBROADCASTI32X2Z256rk, X86::VBROADCASTI32X2Z256mk, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE },
{ X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmk, 0 },
+ { X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmk, 0 },
+ { X86::VCVTPD2DQZ256rrk, X86::VCVTPD2DQZ256rmk, 0 },
+ { X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmk, 0 },
+ { X86::VCVTPD2QQZ256rrk, X86::VCVTPD2QQZ256rmk, 0 },
+ { X86::VCVTPD2UDQZ256rrk, X86::VCVTPD2UDQZ256rmk, 0 },
+ { X86::VCVTPD2UQQZ256rrk, X86::VCVTPD2UQQZ256rmk, 0 },
+ { X86::VCVTPH2PSZ256rrk, X86::VCVTPH2PSZ256rmk, 0 },
+ { X86::VCVTPS2DQZ256rrk, X86::VCVTPS2DQZ256rmk, 0 },
+ { X86::VCVTPS2PDZ256rrk, X86::VCVTPS2PDZ256rmk, 0 },
+ { X86::VCVTPS2QQZ256rrk, X86::VCVTPS2QQZ256rmk, 0 },
+ { X86::VCVTPS2UDQZ256rrk, X86::VCVTPS2UDQZ256rmk, 0 },
+ { X86::VCVTPS2UQQZ256rrk, X86::VCVTPS2UQQZ256rmk, 0 },
+ { X86::VCVTQQ2PDZ256rrk, X86::VCVTQQ2PDZ256rmk, 0 },
+ { X86::VCVTQQ2PSZ256rrk, X86::VCVTQQ2PSZ256rmk, 0 },
+ { X86::VCVTTPD2DQZ256rrk, X86::VCVTTPD2DQZ256rmk, 0 },
+ { X86::VCVTTPD2QQZ256rrk, X86::VCVTTPD2QQZ256rmk, 0 },
+ { X86::VCVTTPD2UDQZ256rrk, X86::VCVTTPD2UDQZ256rmk, 0 },
+ { X86::VCVTTPD2UQQZ256rrk, X86::VCVTTPD2UQQZ256rmk, 0 },
+ { X86::VCVTTPS2DQZ256rrk, X86::VCVTTPS2DQZ256rmk, 0 },
+ { X86::VCVTTPS2QQZ256rrk, X86::VCVTTPS2QQZ256rmk, 0 },
+ { X86::VCVTTPS2UDQZ256rrk, X86::VCVTTPS2UDQZ256rmk, 0 },
+ { X86::VCVTTPS2UQQZ256rrk, X86::VCVTTPS2UQQZ256rmk, 0 },
+ { X86::VCVTUDQ2PDZ256rrk, X86::VCVTUDQ2PDZ256rmk, 0 },
+ { X86::VCVTUDQ2PSZ256rrk, X86::VCVTUDQ2PSZ256rmk, 0 },
+ { X86::VCVTUQQ2PDZ256rrk, X86::VCVTUQQ2PDZ256rmk, 0 },
+ { X86::VCVTUQQ2PSZ256rrk, X86::VCVTUQQ2PSZ256rmk, 0 },
+ { X86::VEXPANDPDZ256rrk, X86::VEXPANDPDZ256rmk, TB_NO_REVERSE },
+ { X86::VEXPANDPSZ256rrk, X86::VEXPANDPSZ256rmk, TB_NO_REVERSE },
+ { X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0 },
+ { X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mk, 0 },
+ { X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0 },
+ { X86::VGETMANTPSZ256rrik, X86::VGETMANTPSZ256rmik, 0 },
+ { X86::VMOVDDUPZ256rrk, X86::VMOVDDUPZ256rmk, 0 },
+ { X86::VMOVSHDUPZ256rrk, X86::VMOVSHDUPZ256rmk, 0 },
+ { X86::VMOVSLDUPZ256rrk, X86::VMOVSLDUPZ256rmk, 0 },
{ X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
{ X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
{ X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
{ X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
+ { X86::VPBLENDMBZ256rrk, X86::VPBLENDMBZ256rmk, 0 },
+ { X86::VPBLENDMDZ256rrk, X86::VPBLENDMDZ256rmk, 0 },
+ { X86::VPBLENDMQZ256rrk, X86::VPBLENDMQZ256rmk, 0 },
+ { X86::VPBLENDMWZ256rrk, X86::VPBLENDMWZ256rmk, 0 },
{ X86::VPCONFLICTDZ256rrk, X86::VPCONFLICTDZ256rmk, 0 },
{ X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 },
{ X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
{ X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
{ X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
{ X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 },
+ { X86::VPEXPANDBZ256rrk, X86::VPEXPANDBZ256rmk, TB_NO_REVERSE },
+ { X86::VPEXPANDDZ256rrk, X86::VPEXPANDDZ256rmk, TB_NO_REVERSE },
+ { X86::VPEXPANDQZ256rrk, X86::VPEXPANDQZ256rmk, TB_NO_REVERSE },
+ { X86::VPEXPANDWZ256rrk, X86::VPEXPANDWZ256rmk, TB_NO_REVERSE },
{ X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmk, 0 },
{ X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 },
{ X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE },
@@ -3579,6 +4052,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPOPCNTDZ256rrk, X86::VPOPCNTDZ256rmk, 0 },
{ X86::VPOPCNTQZ256rrk, X86::VPOPCNTQZ256rmk, 0 },
{ X86::VPOPCNTWZ256rrk, X86::VPOPCNTWZ256rmk, 0 },
+ { X86::VPROLDZ256rik, X86::VPROLDZ256mik, 0 },
+ { X86::VPROLQZ256rik, X86::VPROLQZ256mik, 0 },
+ { X86::VPRORDZ256rik, X86::VPRORDZ256mik, 0 },
+ { X86::VPRORQZ256rik, X86::VPRORQZ256mik, 0 },
{ X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
{ X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
{ X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
@@ -3591,17 +4068,74 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 },
{ X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 },
{ X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 },
+ { X86::VRCP14PDZ256rk, X86::VRCP14PDZ256mk, 0 },
+ { X86::VRCP14PSZ256rk, X86::VRCP14PSZ256mk, 0 },
+ { X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0 },
+ { X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmik, 0 },
+ { X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0 },
+ { X86::VRNDSCALEPSZ256rrik, X86::VRNDSCALEPSZ256rmik, 0 },
+ { X86::VRSQRT14PDZ256rk, X86::VRSQRT14PDZ256mk, 0 },
+ { X86::VRSQRT14PSZ256rk, X86::VRSQRT14PSZ256mk, 0 },
+ { X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0 },
+ { X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mk, 0 },
// AVX-512VL 128-bit masked foldable instructions
+ { X86::VBLENDMPDZ128rrk, X86::VBLENDMPDZ128rmk, 0 },
+ { X86::VBLENDMPSZ128rrk, X86::VBLENDMPSZ128rmk, 0 },
+ { X86::VBROADCASTI32X2Z128rk, X86::VBROADCASTI32X2Z128mk, TB_NO_REVERSE },
{ X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE },
+ { X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmk, 0 },
+ { X86::VCVTPD2DQZ128rrk, X86::VCVTPD2DQZ128rmk, 0 },
+ { X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmk, 0 },
+ { X86::VCVTPD2QQZ128rrk, X86::VCVTPD2QQZ128rmk, 0 },
+ { X86::VCVTPD2UDQZ128rrk, X86::VCVTPD2UDQZ128rmk, 0 },
+ { X86::VCVTPD2UQQZ128rrk, X86::VCVTPD2UQQZ128rmk, 0 },
+ { X86::VCVTPH2PSZ128rrk, X86::VCVTPH2PSZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTPS2DQZ128rrk, X86::VCVTPS2DQZ128rmk, 0 },
+ { X86::VCVTPS2PDZ128rrk, X86::VCVTPS2PDZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTPS2QQZ128rrk, X86::VCVTPS2QQZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTPS2UDQZ128rrk, X86::VCVTPS2UDQZ128rmk, 0 },
+ { X86::VCVTPS2UQQZ128rrk, X86::VCVTPS2UQQZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTQQ2PDZ128rrk, X86::VCVTQQ2PDZ128rmk, 0 },
+ { X86::VCVTQQ2PSZ128rrk, X86::VCVTQQ2PSZ128rmk, 0 },
+ { X86::VCVTTPD2DQZ128rrk, X86::VCVTTPD2DQZ128rmk, 0 },
+ { X86::VCVTTPD2QQZ128rrk, X86::VCVTTPD2QQZ128rmk, 0 },
+ { X86::VCVTTPD2UDQZ128rrk, X86::VCVTTPD2UDQZ128rmk, 0 },
+ { X86::VCVTTPD2UQQZ128rrk, X86::VCVTTPD2UQQZ128rmk, 0 },
+ { X86::VCVTTPS2DQZ128rrk, X86::VCVTTPS2DQZ128rmk, 0 },
+ { X86::VCVTTPS2QQZ128rrk, X86::VCVTTPS2QQZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTTPS2UDQZ128rrk, X86::VCVTTPS2UDQZ128rmk, 0 },
+ { X86::VCVTTPS2UQQZ128rrk, X86::VCVTTPS2UQQZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTUDQ2PDZ128rrk, X86::VCVTUDQ2PDZ128rmk, TB_NO_REVERSE },
+ { X86::VCVTUDQ2PSZ128rrk, X86::VCVTUDQ2PSZ128rmk, 0 },
+ { X86::VCVTUQQ2PDZ128rrk, X86::VCVTUQQ2PDZ128rmk, 0 },
+ { X86::VCVTUQQ2PSZ128rrk, X86::VCVTUQQ2PSZ128rmk, 0 },
+ { X86::VEXPANDPDZ128rrk, X86::VEXPANDPDZ128rmk, TB_NO_REVERSE },
+ { X86::VEXPANDPSZ128rrk, X86::VEXPANDPSZ128rmk, TB_NO_REVERSE },
+ { X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0 },
+ { X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mk, 0 },
+ { X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0 },
+ { X86::VGETMANTPSZ128rrik, X86::VGETMANTPSZ128rmik, 0 },
+ { X86::VMOVDDUPZ128rrk, X86::VMOVDDUPZ128rmk, TB_NO_REVERSE },
+ { X86::VMOVSHDUPZ128rrk, X86::VMOVSHDUPZ128rmk, 0 },
+ { X86::VMOVSLDUPZ128rrk, X86::VMOVSLDUPZ128rmk, 0 },
{ X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
{ X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
{ X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
{ X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
+ { X86::VPBLENDMBZ128rrk, X86::VPBLENDMBZ128rmk, 0 },
+ { X86::VPBLENDMDZ128rrk, X86::VPBLENDMDZ128rmk, 0 },
+ { X86::VPBLENDMQZ128rrk, X86::VPBLENDMQZ128rmk, 0 },
+ { X86::VPBLENDMWZ128rrk, X86::VPBLENDMWZ128rmk, 0 },
{ X86::VPCONFLICTDZ128rrk, X86::VPCONFLICTDZ128rmk, 0 },
{ X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 },
{ X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
{ X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
+ { X86::VPEXPANDBZ128rrk, X86::VPEXPANDBZ128rmk, TB_NO_REVERSE },
+ { X86::VPEXPANDDZ128rrk, X86::VPEXPANDDZ128rmk, TB_NO_REVERSE },
+ { X86::VPEXPANDQZ128rrk, X86::VPEXPANDQZ128rmk, TB_NO_REVERSE },
+ { X86::VPEXPANDWZ128rrk, X86::VPEXPANDWZ128rmk, TB_NO_REVERSE },
{ X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmk, 0 },
{ X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 },
{ X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
@@ -3620,6 +4154,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPOPCNTDZ128rrk, X86::VPOPCNTDZ128rmk, 0 },
{ X86::VPOPCNTQZ128rrk, X86::VPOPCNTQZ128rmk, 0 },
{ X86::VPOPCNTWZ128rrk, X86::VPOPCNTWZ128rmk, 0 },
+ { X86::VPROLDZ128rik, X86::VPROLDZ128mik, 0 },
+ { X86::VPROLQZ128rik, X86::VPROLQZ128mik, 0 },
+ { X86::VPRORDZ128rik, X86::VPRORDZ128mik, 0 },
+ { X86::VPRORQZ128rik, X86::VPRORQZ128mik, 0 },
{ X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
{ X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
{ X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
@@ -3632,6 +4170,49 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
{ X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
{ X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
+ { X86::VRCP14PDZ128rk, X86::VRCP14PDZ128mk, 0 },
+ { X86::VRCP14PSZ128rk, X86::VRCP14PSZ128mk, 0 },
+ { X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0 },
+ { X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmik, 0 },
+ { X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0 },
+ { X86::VRNDSCALEPSZ128rrik, X86::VRNDSCALEPSZ128rmik, 0 },
+ { X86::VRSQRT14PDZ128rk, X86::VRSQRT14PDZ128mk, 0 },
+ { X86::VRSQRT14PSZ128rk, X86::VRSQRT14PSZ128mk, 0 },
+ { X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0 },
+ { X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mk, 0 },
+
+ // AVX512 masked move/load instructions. These use TB_NO_REVERSE to prevent
+ // unfolding creating an unmasked load.
+ { X86::VMOVAPDZ128rrk, X86::VMOVAPDZ128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVAPDZ256rrk, X86::VMOVAPDZ256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVAPDZrrk, X86::VMOVAPDZrmk, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVAPSZ128rrk, X86::VMOVAPSZ128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVAPSZ256rrk, X86::VMOVAPSZ256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVAPSZrrk, X86::VMOVAPSZrmk, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVDQA32Z128rrk, X86::VMOVDQA32Z128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVDQA32Z256rrk, X86::VMOVDQA32Z256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVDQA32Zrrk, X86::VMOVDQA32Zrmk, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVDQA64Z128rrk, X86::VMOVDQA64Z128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
+ { X86::VMOVDQA64Z256rrk, X86::VMOVDQA64Z256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
+ { X86::VMOVDQA64Zrrk, X86::VMOVDQA64Zrmk, TB_NO_REVERSE | TB_ALIGN_64 },
+ { X86::VMOVDQU16Z128rrk, X86::VMOVDQU16Z128rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU16Z256rrk, X86::VMOVDQU16Z256rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU16Zrrk, X86::VMOVDQU16Zrmk, TB_NO_REVERSE },
+ { X86::VMOVDQU32Z128rrk, X86::VMOVDQU32Z128rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU32Z256rrk, X86::VMOVDQU32Z256rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU32Zrrk, X86::VMOVDQU32Zrmk, TB_NO_REVERSE },
+ { X86::VMOVDQU64Z128rrk, X86::VMOVDQU64Z128rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU64Z256rrk, X86::VMOVDQU64Z256rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU64Zrrk, X86::VMOVDQU64Zrmk, TB_NO_REVERSE },
+ { X86::VMOVDQU8Z128rrk, X86::VMOVDQU8Z128rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU8Z256rrk, X86::VMOVDQU8Z256rmk, TB_NO_REVERSE },
+ { X86::VMOVDQU8Zrrk, X86::VMOVDQU8Zrmk, TB_NO_REVERSE },
+ { X86::VMOVUPDZ128rrk, X86::VMOVUPDZ128rmk, TB_NO_REVERSE },
+ { X86::VMOVUPDZ256rrk, X86::VMOVUPDZ256rmk, TB_NO_REVERSE },
+ { X86::VMOVUPDZrrk, X86::VMOVUPDZrmk, TB_NO_REVERSE },
+ { X86::VMOVUPSZ128rrk, X86::VMOVUPSZ128rmk, TB_NO_REVERSE },
+ { X86::VMOVUPSZ256rrk, X86::VMOVUPSZ256rmk, TB_NO_REVERSE },
+ { X86::VMOVUPSZrrk, X86::VMOVUPSZrmk, TB_NO_REVERSE },
// AVX-512 masked compare instructions
{ X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
@@ -3690,6 +4271,30 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 },
{ X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 },
{ X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 },
+ { X86::VPTESTMBZ128rrk, X86::VPTESTMBZ128rmk, 0 },
+ { X86::VPTESTMBZ256rrk, X86::VPTESTMBZ256rmk, 0 },
+ { X86::VPTESTMBZrrk, X86::VPTESTMBZrmk, 0 },
+ { X86::VPTESTMDZ128rrk, X86::VPTESTMDZ128rmk, 0 },
+ { X86::VPTESTMDZ256rrk, X86::VPTESTMDZ256rmk, 0 },
+ { X86::VPTESTMDZrrk, X86::VPTESTMDZrmk, 0 },
+ { X86::VPTESTMQZ128rrk, X86::VPTESTMQZ128rmk, 0 },
+ { X86::VPTESTMQZ256rrk, X86::VPTESTMQZ256rmk, 0 },
+ { X86::VPTESTMQZrrk, X86::VPTESTMQZrmk, 0 },
+ { X86::VPTESTMWZ128rrk, X86::VPTESTMWZ128rmk, 0 },
+ { X86::VPTESTMWZ256rrk, X86::VPTESTMWZ256rmk, 0 },
+ { X86::VPTESTMWZrrk, X86::VPTESTMWZrmk, 0 },
+ { X86::VPTESTNMBZ128rrk, X86::VPTESTNMBZ128rmk, 0 },
+ { X86::VPTESTNMBZ256rrk, X86::VPTESTNMBZ256rmk, 0 },
+ { X86::VPTESTNMBZrrk, X86::VPTESTNMBZrmk, 0 },
+ { X86::VPTESTNMDZ128rrk, X86::VPTESTNMDZ128rmk, 0 },
+ { X86::VPTESTNMDZ256rrk, X86::VPTESTNMDZ256rmk, 0 },
+ { X86::VPTESTNMDZrrk, X86::VPTESTNMDZrmk, 0 },
+ { X86::VPTESTNMQZ128rrk, X86::VPTESTNMQZ128rmk, 0 },
+ { X86::VPTESTNMQZ256rrk, X86::VPTESTNMQZ256rmk, 0 },
+ { X86::VPTESTNMQZrrk, X86::VPTESTNMQZrmk, 0 },
+ { X86::VPTESTNMWZ128rrk, X86::VPTESTNMWZ128rmk, 0 },
+ { X86::VPTESTNMWZ256rrk, X86::VPTESTNMWZ256rmk, 0 },
+ { X86::VPTESTNMWZrrk, X86::VPTESTNMWZrmk, 0 },
};
for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
@@ -3727,10 +4332,21 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 },
{ X86::VANDPDZrrk, X86::VANDPDZrmk, 0 },
{ X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
+ { X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
{ X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
+ { X86::VFIXUPIMMPDZrrik, X86::VFIXUPIMMPDZrmik, 0 },
+ { X86::VFIXUPIMMPSZrrik, X86::VFIXUPIMMPSZrmik, 0 },
+ { X86::VFIXUPIMMSDZrrik, X86::VFIXUPIMMSDZrmik, TB_NO_REVERSE },
+ { X86::VFIXUPIMMSSZrrik, X86::VFIXUPIMMSSZrmik, TB_NO_REVERSE },
+ { X86::VGETEXPSDZrk, X86::VGETEXPSDZmk, TB_NO_REVERSE },
+ { X86::VGETEXPSSZrk, X86::VGETEXPSSZmk, TB_NO_REVERSE },
+ { X86::VGETMANTSDZrrik, X86::VGETMANTSDZrmik, TB_NO_REVERSE },
+ { X86::VGETMANTSSZrrik, X86::VGETMANTSSZrmik, TB_NO_REVERSE },
{ X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
{ X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
{ X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
@@ -3776,6 +4392,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
{ X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
{ X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
+ { X86::VPDPBUSDSZrk, X86::VPDPBUSDSZmk, 0 },
+ { X86::VPDPBUSDZrk, X86::VPDPBUSDZmk, 0 },
+ { X86::VPDPWSSDSZrk, X86::VPDPWSSDSZmk, 0 },
+ { X86::VPDPWSSDZrk, X86::VPDPWSSDZmk, 0 },
{ X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
{ X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
{ X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
@@ -3817,12 +4437,32 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
{ X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
{ X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
+ { X86::VPMULHRSWZrrk, X86::VPMULHRSWZrmk, 0 },
+ { X86::VPMULHUWZrrk, X86::VPMULHUWZrmk, 0 },
+ { X86::VPMULHWZrrk, X86::VPMULHWZrmk, 0 },
{ X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
{ X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
{ X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 },
+ { X86::VPMULTISHIFTQBZrrk, X86::VPMULTISHIFTQBZrmk, 0 },
{ X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 },
{ X86::VPORDZrrk, X86::VPORDZrmk, 0 },
{ X86::VPORQZrrk, X86::VPORQZrmk, 0 },
+ { X86::VPROLVDZrrk, X86::VPROLVDZrmk, 0 },
+ { X86::VPROLVQZrrk, X86::VPROLVQZrmk, 0 },
+ { X86::VPRORVDZrrk, X86::VPRORVDZrmk, 0 },
+ { X86::VPRORVQZrrk, X86::VPRORVQZrmk, 0 },
+ { X86::VPSHLDDZrrik, X86::VPSHLDDZrmik, 0 },
+ { X86::VPSHLDQZrrik, X86::VPSHLDQZrmik, 0 },
+ { X86::VPSHLDVDZrk, X86::VPSHLDVDZmk, 0 },
+ { X86::VPSHLDVQZrk, X86::VPSHLDVQZmk, 0 },
+ { X86::VPSHLDVWZrk, X86::VPSHLDVWZmk, 0 },
+ { X86::VPSHLDWZrrik, X86::VPSHLDWZrmik, 0 },
+ { X86::VPSHRDDZrrik, X86::VPSHRDDZrmik, 0 },
+ { X86::VPSHRDQZrrik, X86::VPSHRDQZrmik, 0 },
+ { X86::VPSHRDVDZrk, X86::VPSHRDVDZmk, 0 },
+ { X86::VPSHRDVQZrk, X86::VPSHRDVQZmk, 0 },
+ { X86::VPSHRDVWZrk, X86::VPSHRDVWZmk, 0 },
+ { X86::VPSHRDWZrrik, X86::VPSHRDWZrmik, 0 },
{ X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
{ X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
{ X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
@@ -3862,12 +4502,34 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
{ X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
{ X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
+ { X86::VRANGEPDZrrik, X86::VRANGEPDZrmik, 0 },
+ { X86::VRANGEPSZrrik, X86::VRANGEPSZrmik, 0 },
+ { X86::VRANGESDZrrik, X86::VRANGESDZrmik, TB_NO_REVERSE },
+ { X86::VRANGESSZrrik, X86::VRANGESSZrmik, TB_NO_REVERSE },
+ { X86::VRCP14SDZrrk, X86::VRCP14SDZrmk, TB_NO_REVERSE },
+ { X86::VRCP14SSZrrk, X86::VRCP14SSZrmk, TB_NO_REVERSE },
+ { X86::VRCP28SDZrk, X86::VRCP28SDZmk, TB_NO_REVERSE },
+ { X86::VRCP28SSZrk, X86::VRCP28SSZmk, TB_NO_REVERSE },
+ { X86::VREDUCESDZrrik, X86::VREDUCESDZrmik, TB_NO_REVERSE },
+ { X86::VREDUCESSZrrik, X86::VREDUCESSZrmik, TB_NO_REVERSE },
+ { X86::VRNDSCALESDZr_Intk, X86::VRNDSCALESDZm_Intk, TB_NO_REVERSE },
+ { X86::VRNDSCALESSZr_Intk, X86::VRNDSCALESSZm_Intk, TB_NO_REVERSE },
+ { X86::VRSQRT14SDZrrk, X86::VRSQRT14SDZrmk, TB_NO_REVERSE },
+ { X86::VRSQRT14SSZrrk, X86::VRSQRT14SSZrmk, TB_NO_REVERSE },
+ { X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE },
+ { X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE },
+ { X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0 },
+ { X86::VSCALEFPSZrrk, X86::VSCALEFPSZrmk, 0 },
+ { X86::VSCALEFSDZrrk, X86::VSCALEFSDZrmk, TB_NO_REVERSE },
+ { X86::VSCALEFSSZrrk, X86::VSCALEFSSZrmk, TB_NO_REVERSE },
{ X86::VSHUFF32X4Zrrik, X86::VSHUFF32X4Zrmik, 0 },
{ X86::VSHUFF64X2Zrrik, X86::VSHUFF64X2Zrmik, 0 },
{ X86::VSHUFI32X4Zrrik, X86::VSHUFI32X4Zrmik, 0 },
{ X86::VSHUFI64X2Zrrik, X86::VSHUFI64X2Zrmik, 0 },
{ X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 },
{ X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
+ { X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE },
+ { X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
{ X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
@@ -3888,8 +4550,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 },
{ X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 },
{ X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
+ { X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0 },
{ X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
{ X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
+ { X86::VFIXUPIMMPDZ256rrik,X86::VFIXUPIMMPDZ256rmik, 0 },
+ { X86::VFIXUPIMMPSZ256rrik,X86::VFIXUPIMMPSZ256rmik, 0 },
{ X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 },
{ X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 },
{ X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 },
@@ -3925,6 +4590,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
{ X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
{ X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
+ { X86::VPDPBUSDSZ256rk, X86::VPDPBUSDSZ256mk, 0 },
+ { X86::VPDPBUSDZ256rk, X86::VPDPBUSDZ256mk, 0 },
+ { X86::VPDPWSSDSZ256rk, X86::VPDPWSSDSZ256mk, 0 },
+ { X86::VPDPWSSDZ256rk, X86::VPDPWSSDZ256mk, 0 },
{ X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
{ X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
{ X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
@@ -3966,12 +4635,32 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
{ X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
{ X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
+ { X86::VPMULHRSWZ256rrk, X86::VPMULHRSWZ256rmk, 0 },
+ { X86::VPMULHUWZ256rrk, X86::VPMULHUWZ256rmk, 0 },
+ { X86::VPMULHWZ256rrk, X86::VPMULHWZ256rmk, 0 },
{ X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
{ X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
{ X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 },
+ { X86::VPMULTISHIFTQBZ256rrk, X86::VPMULTISHIFTQBZ256rmk, 0 },
{ X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 },
{ X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
{ X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
+ { X86::VPROLVDZ256rrk, X86::VPROLVDZ256rmk, 0 },
+ { X86::VPROLVQZ256rrk, X86::VPROLVQZ256rmk, 0 },
+ { X86::VPRORVDZ256rrk, X86::VPRORVDZ256rmk, 0 },
+ { X86::VPRORVQZ256rrk, X86::VPRORVQZ256rmk, 0 },
+ { X86::VPSHLDDZ256rrik, X86::VPSHLDDZ256rmik, 0 },
+ { X86::VPSHLDQZ256rrik, X86::VPSHLDQZ256rmik, 0 },
+ { X86::VPSHLDVDZ256rk, X86::VPSHLDVDZ256mk, 0 },
+ { X86::VPSHLDVQZ256rk, X86::VPSHLDVQZ256mk, 0 },
+ { X86::VPSHLDVWZ256rk, X86::VPSHLDVWZ256mk, 0 },
+ { X86::VPSHLDWZ256rrik, X86::VPSHLDWZ256rmik, 0 },
+ { X86::VPSHRDDZ256rrik, X86::VPSHRDDZ256rmik, 0 },
+ { X86::VPSHRDQZ256rrik, X86::VPSHRDQZ256rmik, 0 },
+ { X86::VPSHRDVDZ256rk, X86::VPSHRDVDZ256mk, 0 },
+ { X86::VPSHRDVQZ256rk, X86::VPSHRDVQZ256mk, 0 },
+ { X86::VPSHRDVWZ256rk, X86::VPSHRDVWZ256mk, 0 },
+ { X86::VPSHRDWZ256rrik, X86::VPSHRDWZ256rmik, 0 },
{ X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
{ X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
{ X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
@@ -4011,6 +4700,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
{ X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
{ X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
+ { X86::VRANGEPDZ256rrik, X86::VRANGEPDZ256rmik, 0 },
+ { X86::VRANGEPSZ256rrik, X86::VRANGEPSZ256rmik, 0 },
+ { X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0 },
+ { X86::VSCALEFPSZ256rrk, X86::VSCALEFPSZ256rmk, 0 },
{ X86::VSHUFF32X4Z256rrik, X86::VSHUFF32X4Z256rmik, 0 },
{ X86::VSHUFF64X2Z256rrik, X86::VSHUFF64X2Z256rmik, 0 },
{ X86::VSHUFI32X4Z256rrik, X86::VSHUFI32X4Z256rmik, 0 },
@@ -4035,8 +4728,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 },
{ X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 },
{ X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 },
+ { X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
{ X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
{ X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
+ { X86::VFIXUPIMMPDZ128rrik,X86::VFIXUPIMMPDZ128rmik, 0 },
+ { X86::VFIXUPIMMPSZ128rrik,X86::VFIXUPIMMPSZ128rmik, 0 },
{ X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
{ X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
{ X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
@@ -4068,6 +4764,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
{ X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
{ X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
+ { X86::VPDPBUSDSZ128rk, X86::VPDPBUSDSZ128mk, 0 },
+ { X86::VPDPBUSDZ128rk, X86::VPDPBUSDZ128mk, 0 },
+ { X86::VPDPWSSDSZ128rk, X86::VPDPWSSDSZ128mk, 0 },
+ { X86::VPDPWSSDZ128rk, X86::VPDPWSSDZ128mk, 0 },
{ X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
{ X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
{ X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
@@ -4105,12 +4805,32 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
{ X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
{ X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
+ { X86::VPMULHRSWZ128rrk, X86::VPMULHRSWZ128rmk, 0 },
+ { X86::VPMULHUWZ128rrk, X86::VPMULHUWZ128rmk, 0 },
+ { X86::VPMULHWZ128rrk, X86::VPMULHWZ128rmk, 0 },
{ X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
{ X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
{ X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 },
+ { X86::VPMULTISHIFTQBZ128rrk, X86::VPMULTISHIFTQBZ128rmk, 0 },
{ X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 },
{ X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
{ X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
+ { X86::VPROLVDZ128rrk, X86::VPROLVDZ128rmk, 0 },
+ { X86::VPROLVQZ128rrk, X86::VPROLVQZ128rmk, 0 },
+ { X86::VPRORVDZ128rrk, X86::VPRORVDZ128rmk, 0 },
+ { X86::VPRORVQZ128rrk, X86::VPRORVQZ128rmk, 0 },
+ { X86::VPSHLDDZ128rrik, X86::VPSHLDDZ128rmik, 0 },
+ { X86::VPSHLDQZ128rrik, X86::VPSHLDQZ128rmik, 0 },
+ { X86::VPSHLDVDZ128rk, X86::VPSHLDVDZ128mk, 0 },
+ { X86::VPSHLDVQZ128rk, X86::VPSHLDVQZ128mk, 0 },
+ { X86::VPSHLDVWZ128rk, X86::VPSHLDVWZ128mk, 0 },
+ { X86::VPSHLDWZ128rrik, X86::VPSHLDWZ128rmik, 0 },
+ { X86::VPSHRDDZ128rrik, X86::VPSHRDDZ128rmik, 0 },
+ { X86::VPSHRDQZ128rrik, X86::VPSHRDQZ128rmik, 0 },
+ { X86::VPSHRDVDZ128rk, X86::VPSHRDVDZ128mk, 0 },
+ { X86::VPSHRDVQZ128rk, X86::VPSHRDVQZ128mk, 0 },
+ { X86::VPSHRDVWZ128rk, X86::VPSHRDVWZ128mk, 0 },
+ { X86::VPSHRDWZ128rrik, X86::VPSHRDWZ128rmik, 0 },
{ X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
{ X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
{ X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
@@ -4150,6 +4870,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
{ X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
{ X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
+ { X86::VRANGEPDZ128rrik, X86::VRANGEPDZ128rmik, 0 },
+ { X86::VRANGEPSZ128rrik, X86::VRANGEPSZ128rmik, 0 },
+ { X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0 },
+ { X86::VSCALEFPSZ128rrk, X86::VSCALEFPSZ128rmk, 0 },
{ X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 },
{ X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 },
{ X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
@@ -4161,7 +4885,26 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 },
{ X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 },
+ // GFNI masked instructions.
+ { X86::VGF2P8AFFINEINVQBZ128rrik, X86::VGF2P8AFFINEINVQBZ128rmik, 0 },
+ { X86::VGF2P8AFFINEINVQBZ256rrik, X86::VGF2P8AFFINEINVQBZ256rmik, 0 },
+ { X86::VGF2P8AFFINEINVQBZrrik, X86::VGF2P8AFFINEINVQBZrmik, 0 },
+ { X86::VGF2P8AFFINEQBZ128rrik, X86::VGF2P8AFFINEQBZ128rmik, 0 },
+ { X86::VGF2P8AFFINEQBZ256rrik, X86::VGF2P8AFFINEQBZ256rmik, 0 },
+ { X86::VGF2P8AFFINEQBZrrik, X86::VGF2P8AFFINEQBZrmik, 0 },
+ { X86::VGF2P8MULBZ128rrk, X86::VGF2P8MULBZ128rmk, 0 },
+ { X86::VGF2P8MULBZ256rrk, X86::VGF2P8MULBZ256rmk, 0 },
+ { X86::VGF2P8MULBZrrk, X86::VGF2P8MULBZrmk, 0 },
+
// 512-bit three source instructions with zero masking.
+ { X86::VFIXUPIMMPDZrrikz, X86::VFIXUPIMMPDZrmikz, 0 },
+ { X86::VFIXUPIMMPSZrrikz, X86::VFIXUPIMMPSZrmikz, 0 },
+ { X86::VFIXUPIMMSDZrrikz, X86::VFIXUPIMMSDZrmikz, TB_NO_REVERSE },
+ { X86::VFIXUPIMMSSZrrikz, X86::VFIXUPIMMSSZrmikz, TB_NO_REVERSE },
+ { X86::VPDPBUSDSZrkz, X86::VPDPBUSDSZmkz, 0 },
+ { X86::VPDPBUSDZrkz, X86::VPDPBUSDZmkz, 0 },
+ { X86::VPDPWSSDSZrkz, X86::VPDPWSSDSZmkz, 0 },
+ { X86::VPDPWSSDZrkz, X86::VPDPWSSDZmkz, 0 },
{ X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 },
{ X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 },
{ X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 },
@@ -4176,10 +4919,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
{ X86::VPMADD52HUQZrkz, X86::VPMADD52HUQZmkz, 0 },
{ X86::VPMADD52LUQZrkz, X86::VPMADD52LUQZmkz, 0 },
+ { X86::VPSHLDVDZrkz, X86::VPSHLDVDZmkz, 0 },
+ { X86::VPSHLDVQZrkz, X86::VPSHLDVQZmkz, 0 },
+ { X86::VPSHLDVWZrkz, X86::VPSHLDVWZmkz, 0 },
+ { X86::VPSHRDVDZrkz, X86::VPSHRDVDZmkz, 0 },
+ { X86::VPSHRDVQZrkz, X86::VPSHRDVQZmkz, 0 },
+ { X86::VPSHRDVWZrkz, X86::VPSHRDVWZmkz, 0 },
{ X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
{ X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
// 256-bit three source instructions with zero masking.
+ { X86::VFIXUPIMMPDZ256rrikz,X86::VFIXUPIMMPDZ256rmikz,0 },
+ { X86::VFIXUPIMMPSZ256rrikz,X86::VFIXUPIMMPSZ256rmikz,0 },
+ { X86::VPDPBUSDSZ256rkz, X86::VPDPBUSDSZ256mkz, 0 },
+ { X86::VPDPBUSDZ256rkz, X86::VPDPBUSDZ256mkz, 0 },
+ { X86::VPDPWSSDSZ256rkz, X86::VPDPWSSDSZ256mkz, 0 },
+ { X86::VPDPWSSDZ256rkz, X86::VPDPWSSDZ256mkz, 0 },
{ X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 },
{ X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 },
{ X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 },
@@ -4194,10 +4949,22 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
{ X86::VPMADD52HUQZ256rkz, X86::VPMADD52HUQZ256mkz, 0 },
{ X86::VPMADD52LUQZ256rkz, X86::VPMADD52LUQZ256mkz, 0 },
+ { X86::VPSHLDVDZ256rkz, X86::VPSHLDVDZ256mkz, 0 },
+ { X86::VPSHLDVQZ256rkz, X86::VPSHLDVQZ256mkz, 0 },
+ { X86::VPSHLDVWZ256rkz, X86::VPSHLDVWZ256mkz, 0 },
+ { X86::VPSHRDVDZ256rkz, X86::VPSHRDVDZ256mkz, 0 },
+ { X86::VPSHRDVQZ256rkz, X86::VPSHRDVQZ256mkz, 0 },
+ { X86::VPSHRDVWZ256rkz, X86::VPSHRDVWZ256mkz, 0 },
{ X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 },
{ X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 },
// 128-bit three source instructions with zero masking.
+ { X86::VFIXUPIMMPDZ128rrikz,X86::VFIXUPIMMPDZ128rmikz,0 },
+ { X86::VFIXUPIMMPSZ128rrikz,X86::VFIXUPIMMPSZ128rmikz,0 },
+ { X86::VPDPBUSDSZ128rkz, X86::VPDPBUSDSZ128mkz, 0 },
+ { X86::VPDPBUSDZ128rkz, X86::VPDPBUSDZ128mkz, 0 },
+ { X86::VPDPWSSDSZ128rkz, X86::VPDPWSSDSZ128mkz, 0 },
+ { X86::VPDPWSSDZ128rkz, X86::VPDPWSSDZ128mkz, 0 },
{ X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 },
{ X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 },
{ X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 },
@@ -4212,6 +4979,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
{ X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
{ X86::VPMADD52HUQZ128rkz, X86::VPMADD52HUQZ128mkz, 0 },
{ X86::VPMADD52LUQZ128rkz, X86::VPMADD52LUQZ128mkz, 0 },
+ { X86::VPSHLDVDZ128rkz, X86::VPSHLDVDZ128mkz, 0 },
+ { X86::VPSHLDVQZ128rkz, X86::VPSHLDVQZ128mkz, 0 },
+ { X86::VPSHLDVWZ128rkz, X86::VPSHLDVWZ128mkz, 0 },
+ { X86::VPSHRDVDZ128rkz, X86::VPSHRDVDZ128mkz, 0 },
+ { X86::VPSHRDVQZ128rkz, X86::VPSHRDVQZ128mkz, 0 },
+ { X86::VPSHRDVWZ128rkz, X86::VPSHRDVWZ128mkz, 0 },
{ X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 },
{ X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 },
};
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=334898&r1=334897&r2=334898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sat Jun 16 16:25:50 2018
@@ -876,7 +876,8 @@ let AddedComplexity = 20, Predicates = [
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG;
+ VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG,
+ NotMemoryFoldable;
}
let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -891,7 +892,7 @@ let Constraints = "$src1 = $dst", AddedC
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>;
+ Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
}
//===----------------------------------------------------------------------===//
@@ -1901,11 +1902,11 @@ multiclass sse12_cmp_scalar<RegisterClas
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched]>;
+ Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, []>,
- Sched<[sched.Folded, ReadAfterLd]>;
+ Sched<[sched.Folded, ReadAfterLd]>, NotMemoryFoldable;
}
}
@@ -2078,11 +2079,12 @@ multiclass sse12_cmp_packed<RegisterClas
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched]>;
+ asm_alt, [], d>, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def rmi_alt : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
- asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>;
+ asm_alt, [], d>, Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
}
}
Modified: llvm/trunk/lib/Target/X86/X86InstrXOP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrXOP.td?rev=334898&r1=334897&r2=334898&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrXOP.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrXOP.td Sat Jun 16 16:25:50 2018
@@ -268,13 +268,14 @@ multiclass xopvpcom<bits<8> opc, string
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched]>;
+ []>, XOP_4V, Sched<[sched]>, NotMemoryFoldable;
let mayLoad = 1 in
def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
!strconcat("vpcom", Suffix,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>;
+ []>, XOP_4V, Sched<[sched.Folded, ReadAfterLd]>,
+ NotMemoryFoldable;
}
}
Modified: llvm/trunk/utils/TableGen/X86FoldTablesEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/X86FoldTablesEmitter.cpp?rev=334898&r1=334897&r2=334898&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/X86FoldTablesEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/X86FoldTablesEmitter.cpp Sat Jun 16 16:25:50 2018
@@ -47,7 +47,9 @@ const char *ExplicitAlign[] = {"MOVDQA",
"MOVNTPD", "MOVNTDQ", "MOVNTDQA"};
// List of instructions NOT requiring explicit memory alignment.
-const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD"};
+const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
+ "PCMPESTRM", "PCMPESTRI",
+ "PCMPISTRM", "PCMPISTRI" };
// For manually mapping instructions that do not match by their encoding.
const ManualMapEntry ManualMapSet[] = {
@@ -63,9 +65,9 @@ const ManualMapEntry ManualMapSet[] = {
{ "ADD16rr_DB", "ADD16rm", NO_UNFOLD },
{ "ADD32rr_DB", "ADD32rm", NO_UNFOLD },
{ "ADD64rr_DB", "ADD64rm", NO_UNFOLD },
- { "PUSH16r", "PUSH16rmm", NO_UNFOLD },
- { "PUSH32r", "PUSH32rmm", NO_UNFOLD },
- { "PUSH64r", "PUSH64rmm", NO_UNFOLD },
+ { "PUSH16r", "PUSH16rmm", UNFOLD },
+ { "PUSH32r", "PUSH32rmm", UNFOLD },
+ { "PUSH64r", "PUSH64rmm", UNFOLD },
{ "TAILJMPr", "TAILJMPm", UNFOLD },
{ "TAILJMPr64", "TAILJMPm64", UNFOLD },
{ "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD },
More information about the llvm-commits
mailing list