[llvm] r330714 - [X86] Add vector element insertion/extraction scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 24 06:21:41 PDT 2018
Author: rksimon
Date: Tue Apr 24 06:21:41 2018
New Revision: 330714
URL: http://llvm.org/viewvc/llvm-project?rev=330714&view=rev
Log:
[X86] Add vector element insertion/extraction scheduler classes
Split off pinsr/pextr and extractps instructions.
(Mostly) fixes PR36887.
Note: It might be worth adding a WriteFInsertLd class as well in the future.
Differential Revision: https://reviews.llvm.org/D45929
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrMMX.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
llvm/trunk/lib/Target/X86/X86SchedHaswell.td
llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
llvm/trunk/lib/Target/X86/X86Schedule.td
llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s
llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Apr 24 06:21:41 2018
@@ -1085,14 +1085,14 @@ def VEXTRACTPSZrr : AVX512AIi8<0x17, MRM
(ins VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
- EVEX, VEX_WIG, Sched<[WriteFBlend]>;
+ EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>,
- EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFBlendLd, WriteRMW]>;
+ EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
@@ -9878,7 +9878,7 @@ multiclass avx512_extract_elt_bw_m<bits<
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
addr:$dst)]>,
- EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, WriteRMW]>;
+ EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
}
multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
@@ -9888,7 +9888,7 @@ multiclass avx512_extract_elt_b<string O
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
- EVEX, TAPD, Sched<[WriteShuffle]>;
+ EVEX, TAPD, Sched<[WriteVecExtract]>;
defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
}
@@ -9901,14 +9901,14 @@ multiclass avx512_extract_elt_w<string O
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst,
(X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
- EVEX, PD, Sched<[WriteShuffle]>;
+ EVEX, PD, Sched<[WriteVecExtract]>;
let hasSideEffects = 0 in
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
EVEX, TAPD, FoldGenData<NAME#rr>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecExtract]>;
defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
}
@@ -9922,7 +9922,7 @@ multiclass avx512_extract_elt_dq<string
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GRC:$dst,
(extractelt (_.VT _.RC:$src1), imm:$src2))]>,
- EVEX, TAPD, Sched<[WriteShuffle]>;
+ EVEX, TAPD, Sched<[WriteVecExtract]>;
def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
@@ -9930,7 +9930,7 @@ multiclass avx512_extract_elt_dq<string
[(store (extractelt (_.VT _.RC:$src1),
imm:$src2),addr:$dst)]>,
EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
- Sched<[WriteShuffleLd, WriteRMW]>;
+ Sched<[WriteVecExtractSt]>;
}
}
@@ -9946,7 +9946,7 @@ multiclass avx512_insert_elt_m<bits<8> o
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
- EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -9957,7 +9957,7 @@ multiclass avx512_insert_elt_bw<bits<8>
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
}
@@ -9971,7 +9971,7 @@ multiclass avx512_insert_elt_dq<bits<8>
OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set _.RC:$dst,
(_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
- EVEX_4V, TAPD, Sched<[WriteShuffle]>;
+ EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
_.ScalarLdFrag>, TAPD;
Modified: llvm/trunk/lib/Target/X86/X86InstrMMX.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrMMX.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrMMX.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrMMX.td Tue Apr 24 06:21:41 2018
@@ -528,7 +528,7 @@ def MMX_PEXTRWrr: MMXIi8<0xC5, MRMSrcReg
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (int_x86_mmx_pextr_w VR64:$src1,
imm:$src2))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecExtract]>;
let Constraints = "$src1 = $dst" in {
let Predicates = [HasSSE1] in {
def MMX_PINSRWrr : MMXIi8<0xC4, MRMSrcReg,
@@ -537,7 +537,7 @@ let Predicates = [HasSSE1] in {
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
GR32orGR64:$src2, imm:$src3))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecInsert]>;
def MMX_PINSRWrm : MMXIi8<0xC4, MRMSrcMem,
(outs VR64:$dst),
@@ -546,7 +546,7 @@ let Predicates = [HasSSE1] in {
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
(i32 (anyext (loadi16 addr:$src2))),
imm:$src3))]>,
- Sched<[WriteShuffleLd, ReadAfterLd]>;
+ Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
}
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Apr 24 06:21:41 2018
@@ -3782,7 +3782,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1>
"vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecInsert]>;
def rm : Ii8<0xC4, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1,
i16mem:$src2, u8imm:$src3),
@@ -3792,7 +3792,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1>
[(set VR128:$dst,
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
imm:$src3))]>,
- Sched<[WriteShuffleLd, ReadAfterLd]>;
+ Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
// Extract
@@ -3802,13 +3802,13 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>,
- PD, VEX, Sched<[WriteShuffle]>;
+ PD, VEX, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecExtract]>;
// Insert
let Predicates = [HasAVX, NoBWI] in
@@ -5085,15 +5085,14 @@ multiclass SS41I_extract8<bits<8> opc, s
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1),
imm:$src2))]>,
- Sched<[WriteShuffle]>;
- let hasSideEffects = 0, mayStore = 1,
- SchedRW = [WriteShuffleLd, WriteRMW] in
+ Sched<[WriteVecExtract]>;
+ let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i8mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))),
- addr:$dst)]>;
+ addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoBWI] in
@@ -5109,16 +5108,15 @@ multiclass SS41I_extract16<bits<8> opc,
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- Sched<[WriteShuffle]>, FoldGenData<NAME#ri>;
+ Sched<[WriteVecExtract]>, FoldGenData<NAME#ri>;
- let hasSideEffects = 0, mayStore = 1,
- SchedRW = [WriteShuffleLd, WriteRMW] in
+ let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i16mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))),
- addr:$dst)]>;
+ addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoBWI] in
@@ -5135,14 +5133,13 @@ multiclass SS41I_extract32<bits<8> opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32:$dst,
(extractelt (v4i32 VR128:$src1), imm:$src2))]>,
- Sched<[WriteShuffle]>;
- let SchedRW = [WriteShuffleLd, WriteRMW] in
+ Sched<[WriteVecExtract]>;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i32mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
- addr:$dst)]>;
+ addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5158,14 +5155,13 @@ multiclass SS41I_extract64<bits<8> opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR64:$dst,
(extractelt (v2i64 VR128:$src1), imm:$src2))]>,
- Sched<[WriteShuffle]>;
- let SchedRW = [WriteShuffleLd, WriteRMW] in
+ Sched<[WriteVecExtract]>;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins i64mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>;
+ addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5182,14 +5178,13 @@ multiclass SS41I_extractf32<bits<8> opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set GR32orGR64:$dst,
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
- Sched<[WriteFBlend]>;
- let SchedRW = [WriteFBlendLd, WriteRMW] in
+ Sched<[WriteVecExtract]>;
def mr : SS4AIi8<opc, MRMDestMem, (outs),
(ins f32mem:$dst, VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
- addr:$dst)]>;
+ addr:$dst)]>, Sched<[WriteVecExtractSt]>;
}
let ExeDomain = SSEPackedSingle in {
@@ -5223,7 +5218,7 @@ multiclass SS41I_insert8<bits<8> opc, st
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecInsert]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i8mem:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5232,7 +5227,7 @@ multiclass SS41I_insert8<bits<8> opc, st
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ imm:$src3))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoBWI] in
@@ -5249,7 +5244,7 @@ multiclass SS41I_insert32<bits<8> opc, s
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecInsert]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i32mem:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5258,7 +5253,7 @@ multiclass SS41I_insert32<bits<8> opc, s
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoDQI] in
@@ -5275,7 +5270,7 @@ multiclass SS41I_insert64<bits<8> opc, s
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
- Sched<[WriteShuffle]>;
+ Sched<[WriteVecInsert]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i64mem:$src2, u8imm:$src3),
!if(Is2Addr,
@@ -5284,7 +5279,7 @@ multiclass SS41I_insert64<bits<8> opc, s
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
+ imm:$src3)))]>, Sched<[WriteVecInsertLd, ReadAfterLd]>;
}
let Predicates = [HasAVX, NoDQI] in
Modified: llvm/trunk/lib/Target/X86/X86SchedBroadwell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedBroadwell.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedBroadwell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedBroadwell.td Tue Apr 24 06:21:41 2018
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
def BroadwellModel : SchedMachineModel {
- // All x86 instructions are modeled as a single micro-op, and HW can decode 4
+ // All x86 instructions are modeled as a single micro-op, and BW can decode 4
// instructions per cycle.
let IssueWidth = 4;
let MicroOpBufferSize = 192; // Based on the reorder buffer.
@@ -190,6 +190,26 @@ defm : BWWriteResPair<WriteVarBlend, [B
defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD.
defm : BWWriteResPair<WritePSADBW, [BWPort0], 5>; // Vector PSADBW.
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [BWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVecInsertLd, [BWPort5,BWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteVecExtract, [BWPort0,BWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
// Conversion between integer and float.
defm : BWWriteResPair<WriteCvtF2I, [BWPort1], 3>; // Float -> Integer.
defm : BWWriteResPair<WriteCvtI2F, [BWPort1], 4>; // Integer -> Float.
@@ -462,17 +482,6 @@ def: InstRW<[BWWriteResGroup10], (instre
"(V?)MOVUPD(Y?)mr",
"(V?)MOVUPS(Y?)mr")>;
-def BWWriteResGroup11 : SchedWriteRes<[BWPort5]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWrr",
- "(V?)PINSRBrr",
- "(V?)PINSRDrr",
- "(V?)PINSRQrr",
- "(V?)PINSRWrr")>;
-
def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -505,15 +514,9 @@ def BWWriteResGroup15 : SchedWriteRes<[B
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[BWWriteResGroup15], (instregex "MMX_PEXTRWrr",
- "VCVTPH2PS(Y?)rr",
+def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PS(Y?)rr",
"(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr",
- "(V?)EXTRACTPSrr",
- "(V?)PEXTRBrr",
- "(V?)PEXTRDrr",
- "(V?)PEXTRQrr",
- "(V?)PEXTRWrr",
"(V?)PSLLDrr",
"(V?)PSLLQrr",
"(V?)PSLLWrr",
@@ -573,17 +576,6 @@ def: InstRW<[BWWriteResGroup20], (instre
"SBB8ri",
"SET(A|BE)r")>;
-def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[BWWriteResGroup21], (instregex "(V?)EXTRACTPSmr",
- "(V?)PEXTRBmr",
- "(V?)PEXTRDmr",
- "(V?)PEXTRQmr",
- "(V?)PEXTRWmr")>;
-
def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedHaswell.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedHaswell.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedHaswell.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedHaswell.td Tue Apr 24 06:21:41 2018
@@ -189,6 +189,26 @@ defm : HWWriteResPair<WriteVarVecShift,
defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>;
defm : HWWriteResPair<WritePSADBW, [HWPort0], 5>;
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVecInsertLd, [HWPort5,HWPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteVecExtract, [HWPort0,HWPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [HWPort4,HWPort5,HWPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
@@ -1092,17 +1112,6 @@ def HWWriteResGroup19 : SchedWriteRes<[H
}
def: InstRW<[HWWriteResGroup19], (instregex "SFENCE")>;
-def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[HWWriteResGroup20], (instregex "(V?)EXTRACTPSmr",
- "(V?)PEXTRBmr",
- "(V?)PEXTRDmr",
- "(V?)PEXTRQmr",
- "(V?)PEXTRWmr")>;
-
def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> {
let Latency = 2;
let NumMicroOps = 3;
@@ -1160,17 +1169,6 @@ def HWWriteResGroup26 : SchedWriteRes<[H
def: InstRW<[HWWriteResGroup26], (instregex "POP(16|32|64)rmm",
"PUSH(16|32|64)rmm")>;
-def HWWriteResGroup27 : SchedWriteRes<[HWPort5]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [2];
-}
-def: InstRW<[HWWriteResGroup27], (instregex "MMX_PINSRWrr",
- "(V?)PINSRBrr",
- "(V?)PINSRDrr",
- "(V?)PINSRQrr",
- "(V?)PINSRWrr")>;
-
def HWWriteResGroup28 : SchedWriteRes<[HWPort01]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -1203,16 +1201,10 @@ def HWWriteResGroup31 : SchedWriteRes<[H
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[HWWriteResGroup31], (instregex "MMX_PEXTRWrr",
- "VCVTPH2PSYrr",
+def: InstRW<[HWWriteResGroup31], (instregex "VCVTPH2PSYrr",
"VCVTPH2PSrr",
"(V?)CVTPS2PDrr",
"(V?)CVTSS2SDrr",
- "(V?)EXTRACTPSrr",
- "(V?)PEXTRBrr",
- "(V?)PEXTRDrr",
- "(V?)PEXTRQrr",
- "(V?)PEXTRWrr",
"(V?)PSLLDrr",
"(V?)PSLLQrr",
"(V?)PSLLWrr",
Modified: llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSandyBridge.td Tue Apr 24 06:21:41 2018
@@ -173,6 +173,25 @@ defm : SBWriteResPair<WriteVarBlend, [SB
defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
defm : SBWriteResPair<WritePSADBW, [SBPort0], 5>;
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
@@ -535,16 +554,6 @@ def SBWriteResGroup16_1 : SchedWriteRes<
}
def: InstRW<[SBWriteResGroup16_1], (instrs BSWAP32r)>;
-def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> {
- let Latency = 2;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup17], (instregex "(V?)PINSRBrr",
- "(V?)PINSRDrr",
- "(V?)PINSRQrr",
- "(V?)PINSRWrr")>;
-
def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
let Latency = 2;
let NumMicroOps = 2;
@@ -590,16 +599,6 @@ def SBWriteResGroup22 : SchedWriteRes<[S
}
def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
-def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> {
- let Latency = 3;
- let NumMicroOps = 2;
- let ResourceCycles = [1,1];
-}
-def: InstRW<[SBWriteResGroup23], (instregex "(V?)PEXTRBrr",
- "(V?)PEXTRDrr",
- "(V?)PEXTRQrr",
- "(V?)PEXTRWrr")>;
-
def SBWriteResGroup23_2 : SchedWriteRes<[SBPort05]> {
let Latency = 3;
let NumMicroOps = 3;
@@ -793,15 +792,6 @@ def SBWriteResGroup37 : SchedWriteRes<[S
def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr",
"VMASKMOVPS(Y?)mr")>;
-def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> {
- let Latency = 5;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SBWriteResGroup39], (instregex "(V?)PEXTRBmr",
- "VPEXTRDmr",
- "VPEXTRWmr")>;
-
def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
let Latency = 5;
let NumMicroOps = 3;
@@ -1009,10 +999,6 @@ def: InstRW<[SBWriteResGroup59], (instre
"(V?)PCMPGTBrm",
"(V?)PCMPGTDrm",
"(V?)PCMPGTWrm",
- "(V?)PINSRBrm",
- "(V?)PINSRDrm",
- "(V?)PINSRQrm",
- "(V?)PINSRWrm",
"(V?)PMAXSBrm",
"(V?)PMAXSDrm",
"(V?)PMAXSWrm",
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeClient.td Tue Apr 24 06:21:41 2018
@@ -187,6 +187,26 @@ defm : SKLWriteResPair<WriteVarBlend, [
defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3>; // Vector PSADBW.
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [SKLPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVecInsertLd, [SKLPort5,SKLPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteVecExtract, [SKLPort0,SKLPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
// Conversion between integer and float.
defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
@@ -571,12 +591,7 @@ def SKLWriteResGroup13 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr",
- "MMX_PINSRWrr",
- "(V?)PINSRBrr",
- "(V?)PINSRDrr",
- "(V?)PINSRQrr",
- "(V?)PINSRWrr")>;
+def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> {
let Latency = 2;
@@ -671,17 +686,6 @@ def: InstRW<[SKLWriteResGroup23], (instr
"SBB8i8",
"SBB8ri")>;
-def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKLWriteResGroup24], (instregex "(V?)EXTRACTPSmr",
- "(V?)PEXTRBmr",
- "(V?)PEXTRDmr",
- "(V?)PEXTRQmr",
- "(V?)PEXTRWmr")>;
-
def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> {
let Latency = 2;
let NumMicroOps = 3;
@@ -761,13 +765,7 @@ def SKLWriteResGroup31 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PEXTRWrr",
- "(V?)EXTRACTPSrr",
- "(V?)PEXTRBrr",
- "(V?)PEXTRDrr",
- "(V?)PEXTRQrr",
- "(V?)PEXTRWrr",
- "(V?)PTEST(Y?)rr")>;
+def: InstRW<[SKLWriteResGroup31], (instregex "(V?)PTEST(Y?)rr")>;
def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> {
let Latency = 3;
Modified: llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td (original)
+++ llvm/trunk/lib/Target/X86/X86SchedSkylakeServer.td Tue Apr 24 06:21:41 2018
@@ -187,6 +187,26 @@ defm : SKXWriteResPair<WriteVarBlend, [S
defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD.
defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1,1], 1, 6>; // Vector PSADBW.
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [SKXPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVecInsertLd, [SKXPort5,SKXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def : WriteRes<WriteVecExtract, [SKXPort0,SKXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
// Conversion between integer and float.
defm : SKXWriteResPair<WriteCvtF2I, [SKXPort1], 3>; // Float -> Integer.
defm : SKXWriteResPair<WriteCvtI2F, [SKXPort1], 4>; // Integer -> Float.
@@ -1035,20 +1055,7 @@ def SKXWriteResGroup13 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [2];
}
-def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr",
- "MMX_PINSRWrr",
- "PINSRBrr",
- "PINSRDrr",
- "PINSRQrr",
- "PINSRWrr",
- "VPINSRBZrr",
- "VPINSRBrr",
- "VPINSRDZrr",
- "VPINSRDrr",
- "VPINSRQZrr",
- "VPINSRQrr",
- "VPINSRWZrr",
- "VPINSRWrr")>;
+def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>;
def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> {
let Latency = 2;
@@ -1163,27 +1170,6 @@ def: InstRW<[SKXWriteResGroup23], (instr
"SBB8i8",
"SBB8ri")>;
-def SKXWriteResGroup24 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
- let Latency = 2;
- let NumMicroOps = 3;
- let ResourceCycles = [1,1,1];
-}
-def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr",
- "PEXTRBmr",
- "PEXTRDmr",
- "PEXTRQmr",
- "PEXTRWmr",
- "VEXTRACTPSZmr(b?)",
- "VEXTRACTPSmr",
- "VPEXTRBZmr(b?)",
- "VPEXTRBmr",
- "VPEXTRDZmr(b?)",
- "VPEXTRDmr",
- "VPEXTRQZmr(b?)",
- "VPEXTRQmr",
- "VPEXTRWZmr(b?)",
- "VPEXTRWmr")>;
-
def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> {
let Latency = 2;
let NumMicroOps = 3;
@@ -1455,25 +1441,7 @@ def SKXWriteResGroup33 : SchedWriteRes<[
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SKXWriteResGroup33], (instregex "EXTRACTPSrr",
- "MMX_PEXTRWrr",
- "PEXTRBrr",
- "PEXTRDrr",
- "PEXTRQrr",
- "PEXTRWrr",
- "PTESTrr",
- "VEXTRACTPSZrr",
- "VEXTRACTPSrr",
- "VPEXTRBZrr",
- "VPEXTRBrr",
- "VPEXTRDZrr",
- "VPEXTRDrr",
- "VPEXTRQZrr",
- "VPEXTRQrr",
- "VPEXTRWZrr",
- "VPEXTRWrr",
- "VPTESTYrr",
- "VPTESTrr")>;
+def: InstRW<[SKXWriteResGroup33], (instregex "(V?)PTEST(Y?)rr")>;
def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> {
let Latency = 3;
Modified: llvm/trunk/lib/Target/X86/X86Schedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Schedule.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Schedule.td (original)
+++ llvm/trunk/lib/Target/X86/X86Schedule.td Tue Apr 24 06:21:41 2018
@@ -117,6 +117,11 @@ defm WriteVarBlend : X86SchedWritePair;
defm WritePSADBW : X86SchedWritePair; // Vector PSADBW.
defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD.
+// Vector insert/extract operations.
+defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
+def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
+def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
+
// MOVMSK operations.
def WriteFMOVMSK : SchedWrite;
def WriteVecMOVMSK : SchedWrite;
Modified: llvm/trunk/lib/Target/X86/X86ScheduleAtom.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleAtom.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleAtom.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleAtom.td Tue Apr 24 06:21:41 2018
@@ -252,6 +252,14 @@ defm : AtomWriteResPair<WriteVarShuffle2
defm : AtomWriteResPair<WriteVarVecShift, [AtomPort0], [AtomPort0]>; // NOTE: Doesn't exist on Atom.
////////////////////////////////////////////////////////////////////////////////
+// Vector insert/extract operations.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : AtomWriteResPair<WriteVecInsert, [AtomPort0], [AtomPort0], 1, 1>;
+def : WriteRes<WriteVecExtract, [AtomPort0]>;
+def : WriteRes<WriteVecExtractSt, [AtomPort0]>;
+
+////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
////////////////////////////////////////////////////////////////////////////////
Modified: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td Tue Apr 24 06:21:41 2018
@@ -385,23 +385,12 @@ defm : JWriteResFpuPair<WriteVarShuffle2
defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar.
////////////////////////////////////////////////////////////////////////////////
-// Vector Extraction instructions.
+// Vector insert/extract operations.
////////////////////////////////////////////////////////////////////////////////
-def JWritePEXTR : SchedWriteRes<[JFPU0, JFPA, JALU0]> { let Latency = 3; }
-def : InstRW<[JWritePEXTR], (instrs MMX_PEXTRWrr,
- EXTRACTPSrr, VEXTRACTPSrr,
- PEXTRBrr, VPEXTRBrr,
- PEXTRDrr, VPEXTRDrr,
- PEXTRQrr, VPEXTRQrr,
- PEXTRWrr, VPEXTRWrr, PEXTRWrr_REV, VPEXTRWrr_REV)>;
-
-def JWritePEXTRSt : SchedWriteRes<[JFPU1, JSTC, JSAGU]> { let Latency = 3; }
-def : InstRW<[JWritePEXTRSt], (instrs EXTRACTPSmr, VEXTRACTPSmr,
- PEXTRBmr, VPEXTRBmr,
- PEXTRDmr, VPEXTRDmr,
- PEXTRQmr, VPEXTRQmr,
- PEXTRWmr, VPEXTRWmr)>;
+defm : JWriteResFpuPair<WriteVecInsert, [JFPU01, JVALU], 1>;
+def : WriteRes<WriteVecExtract, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
+def : WriteRes<WriteVecExtractSt, [JFPU1, JSTC, JSAGU]> { let Latency = 3; }
////////////////////////////////////////////////////////////////////////////////
// SSE42 String instructions.
Modified: llvm/trunk/lib/Target/X86/X86ScheduleSLM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleSLM.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleSLM.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleSLM.td Tue Apr 24 06:21:41 2018
@@ -164,6 +164,16 @@ defm : SLMWriteResPair<WriteBlend, [SLM
defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
+// Vector insert/extract operations.
+defm : SLMWriteResPair<WriteVecInsert, [SLM_FPC_RSV0], 1>;
+
+def : WriteRes<WriteVecExtract, [SLM_FPC_RSV0]>;
+def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 2];
+}
+
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
Modified: llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td (original)
+++ llvm/trunk/lib/Target/X86/X86ScheduleZnver1.td Tue Apr 24 06:21:41 2018
@@ -233,6 +233,19 @@ defm : ZnWriteResFpuPair<WritePSADBW,
// Vector Shift Operations
defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
+// Vector insert/extract operations.
+defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
+
+def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
+ let Latency = 2;
+ let ResourceCycles = [1, 2];
+}
+def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1, 2, 3];
+}
+
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
@@ -987,22 +1000,6 @@ def ZnWritePMOVMSKBY : SchedWriteRes<[Zn
}
def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
-// PEXTR B/W/D/Q.
-// r32,x,i.
-def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
- let Latency = 2;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[ZnWritePEXTRr], (instregex "(V?)PEXTR(B|W|D|Q)rr", "MMX_PEXTRWrr")>;
-
-def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> {
- let Latency = 5;
- let NumMicroOps = 2;
- let ResourceCycles = [1, 2, 3];
-}
-// m8,x,i.
-def : InstRW<[ZnWritePEXTRm], (instregex "(V?)PEXTR(B|W|D|Q)mr")>;
-
// VPBROADCAST B/W.
// x, m8/16.
def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
Modified: llvm/trunk/test/CodeGen/X86/mmx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-schedule.ll?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-schedule.ll Tue Apr 24 06:21:41 2018
@@ -2978,7 +2978,7 @@ declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x
define i32 @test_pextrw(x86_mmx %a0) optsize {
; GENERIC-LABEL: test_pextrw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
+; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_pextrw:
@@ -2993,7 +2993,7 @@ define i32 @test_pextrw(x86_mmx %a0) opt
;
; SANDY-LABEL: test_pextrw:
; SANDY: # %bb.0:
-; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
+; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pextrw:
@@ -3501,9 +3501,9 @@ declare x86_mmx @llvm.x86.ssse3.phsub.w(
define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
; GENERIC-LABEL: test_pinsrw:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
+; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
+; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3525,9 +3525,9 @@ define i64 @test_pinsrw(x86_mmx %a0, i32
;
; SANDY-LABEL: test_pinsrw:
; SANDY: # %bb.0:
-; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
+; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
+; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Tue Apr 24 06:21:41 2018
@@ -1903,7 +1903,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i
; GENERIC-LABEL: test_pextrw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
+; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SLM-LABEL: test_pextrw:
@@ -1915,7 +1915,7 @@ define i32 @test_pextrw(<8 x i16> %a0, i
; SANDY-SSE-LABEL: test_pextrw:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
+; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_pextrw:
Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse1.s Tue Apr 24 06:21:41 2018
@@ -268,9 +268,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: 2 8 1.00 * pavgb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pavgw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pavgw (%rax), %mm2
-# CHECK-NEXT: 1 1 1.00 pextrw $1, %mm0, %ecx
-# CHECK-NEXT: 1 1 1.00 pinsrw $1, %eax, %mm2
-# CHECK-NEXT: 2 6 1.00 * pinsrw $1, (%rax), %mm2
+# CHECK-NEXT: 2 3 1.00 pextrw $1, %mm0, %ecx
+# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2
+# CHECK-NEXT: 2 7 0.50 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pmaxsw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pmaxsw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pmaxub %mm0, %mm2
@@ -331,7 +331,7 @@ xorps (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - 112.00 40.00 54.00 10.00 35.00 33.50 33.50
+# CHECK-NEXT: - 112.00 41.00 55.50 10.00 34.50 33.50 33.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -409,9 +409,9 @@ xorps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pavgw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgw (%rax), %mm2
-# CHECK-NEXT: - - - - - 1.00 - - pextrw $1, %mm0, %ecx
-# CHECK-NEXT: - - - - - 1.00 - - pinsrw $1, %eax, %mm2
-# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pinsrw $1, (%rax), %mm2
+# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %mm0, %ecx
+# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrw $1, %eax, %mm2
+# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrw $1, (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pmaxsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxsw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pmaxub %mm0, %mm2
Modified: llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s?rev=330714&r1=330713&r2=330714&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/SandyBridge/resources-sse41.s Tue Apr 24 06:21:41 2018
@@ -188,7 +188,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 4 5 1.00 * pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 4 5 1.00 * pextrq $1, %xmm0, (%rax)
-# CHECK-NEXT: 3 6 1.00 * pextrw $1, %xmm0, (%rax)
+# CHECK-NEXT: 3 5 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1
@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - 26.00 47.00 5.00 53.00 25.00 25.00
+# CHECK-NEXT: - - 26.00 47.50 5.00 52.50 24.50 24.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
@@ -301,7 +301,7 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrq $1, %xmm0, %rcx
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrq $1, %xmm0, (%rax)
-# CHECK-NEXT: - - - - 1.00 1.00 1.00 1.00 pextrw $1, %xmm0, (%rax)
+# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - - phminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 phminposuw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrb $1, %eax, %xmm1
More information about the llvm-commits
mailing list