[PATCH] D66801: [X86][BtVer2] Fix latency and throughput of conditional SIMD store instructions.
Roman Lebedev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 31 05:54:40 PDT 2019
lebedev.ri added a comment.
FWIW this does not //appear// to be the case on BdVer2:
$ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPSmr
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-072799.o
---
mode: uops
key:
instructions:
- 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x0 %noreg XMM6 XMM11'
- 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x40 %noreg XMM4 XMM9'
- 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x80 %noreg XMM12 XMM12'
- 'VMASKMOVPSmr RDI i_0x1 %noreg i_0xc0 %noreg XMM6 XMM2'
- 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x100 %noreg XMM1 XMM7'
- 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x140 %noreg XMM10 XMM15'
config: ''
register_initial_values:
- 'XMM6=0x0'
- 'XMM11=0x0'
- 'XMM4=0x0'
- 'XMM9=0x0'
- 'XMM12=0x0'
- 'XMM2=0x0'
- 'XMM1=0x0'
- 'XMM7=0x0'
- 'XMM10=0x0'
- 'XMM15=0x0'
cpu_name: bdver2
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: PdFPU0, value: 8.0055, per_snippet_value: 48.033 }
- { key: PdFPU1, value: 4.0124, per_snippet_value: 24.0744 }
- { key: PdFPU2, value: 2.0042, per_snippet_value: 12.0252 }
- { key: PdFPU3, value: 4.0078, per_snippet_value: 24.0468 }
- { key: NumMicroOps, value: 18.0142, per_snippet_value: 108.085 }
error: ''
info: instruction is parallel, repeating a random one.
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C410C462492E1FC462592E4F40C462192EA780000000C4E2492E97C0000000C4E2712EBF00010000C462292EBF40010000C462492E1FC462592E4F40C462192EA780000000C4E2492E97C0000000C4E2712EBF00010000C462292EBF40010000C462492E1FC462592E4F40C462192EA780000000C4E2492E97C0000000C3
...
$ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPDmr
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-28613e.o
---
mode: uops
key:
instructions:
- 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x0 %noreg XMM8 XMM7'
- 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x40 %noreg XMM14 XMM0'
- 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x80 %noreg XMM11 XMM5'
- 'VMASKMOVPDmr RDI i_0x1 %noreg i_0xc0 %noreg XMM4 XMM11'
- 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x100 %noreg XMM12 XMM11'
- 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x140 %noreg XMM4 XMM0'
config: ''
register_initial_values:
- 'XMM8=0x0'
- 'XMM7=0x0'
- 'XMM14=0x0'
- 'XMM0=0x0'
- 'XMM11=0x0'
- 'XMM5=0x0'
- 'XMM4=0x0'
- 'XMM12=0x0'
cpu_name: bdver2
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: PdFPU0, value: 7.9896, per_snippet_value: 47.9376 }
- { key: PdFPU1, value: 4.0235, per_snippet_value: 24.141 }
- { key: PdFPU2, value: 2.0042, per_snippet_value: 12.0252 }
- { key: PdFPU3, value: 4.0077, per_snippet_value: 24.0462 }
- { key: NumMicroOps, value: 18.0128, per_snippet_value: 108.077 }
error: ''
info: instruction is parallel, repeating a random one.
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C410C4E2392F3FC4E2092F4740C4E2212FAF80000000C462592F9FC0000000C462192F9F00010000C4E2592F8740010000C4E2392F3FC4E2092F4740C4E2212FAF80000000C462592F9FC0000000C462192F9F00010000C4E2592F8740010000C4E2392F3FC4E2092F4740C4E2212FAF80000000C462592F9FC0000000C3
...
$ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPSYmr
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-f26721.o
---
mode: uops
key:
instructions:
- 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x0 %noreg YMM5 YMM4'
- 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x40 %noreg YMM2 YMM0'
- 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x80 %noreg YMM15 YMM14'
- 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0xc0 %noreg YMM10 YMM13'
- 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x100 %noreg YMM7 YMM15'
- 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x140 %noreg YMM15 YMM5'
config: ''
register_initial_values:
- 'YMM5=0x0'
- 'YMM4=0x0'
- 'YMM2=0x0'
- 'YMM0=0x0'
- 'YMM15=0x0'
- 'YMM14=0x0'
- 'YMM10=0x0'
- 'YMM13=0x0'
- 'YMM7=0x0'
cpu_name: bdver2
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: PdFPU0, value: 15.9929, per_snippet_value: 95.9574 }
- { key: PdFPU1, value: 8.089, per_snippet_value: 48.534 }
- { key: PdFPU2, value: 2.0012, per_snippet_value: 12.0072 }
- { key: PdFPU3, value: 8.0068, per_snippet_value: 48.0408 }
- { key: NumMicroOps, value: 34.018, per_snippet_value: 204.108 }
error: ''
info: instruction is parallel, repeating a random one.
assembled_snippet: 4883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F2C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F24244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F14244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F04244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F3C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F34244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F14244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F2C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F3C244883C420C4E2552E27C4E26D2E4740C462052EB780000000C4622D2EAFC0000000C462452EBF00010000C4E2052EAF40010000C4E2552E27C4E26D2E4740C462052EB780000000C4622D2EAFC0000000C462452EBF00010000C4E2052EAF40010000C4E2552E27C4E26D2E4740C462052EB780000000C4622D2EAFC0000000C3
...
$ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPDYmr
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-e45324.o
---
mode: uops
key:
instructions:
- 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x0 %noreg YMM15 YMM5'
- 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x40 %noreg YMM9 YMM10'
- 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x80 %noreg YMM10 YMM7'
- 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0xc0 %noreg YMM1 YMM8'
- 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x100 %noreg YMM10 YMM10'
- 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x140 %noreg YMM13 YMM9'
config: ''
register_initial_values:
- 'YMM15=0x0'
- 'YMM5=0x0'
- 'YMM9=0x0'
- 'YMM10=0x0'
- 'YMM7=0x0'
- 'YMM1=0x0'
- 'YMM8=0x0'
- 'YMM13=0x0'
cpu_name: bdver2
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: PdFPU0, value: 16.0013, per_snippet_value: 96.0078 }
- { key: PdFPU1, value: 8.0093, per_snippet_value: 48.0558 }
- { key: PdFPU2, value: 2.0018, per_snippet_value: 12.0108 }
- { key: PdFPU3, value: 8.0068, per_snippet_value: 48.0408 }
- { key: NumMicroOps, value: 34.0168, per_snippet_value: 204.101 }
error: ''
info: instruction is parallel, repeating a random one.
assembled_snippet: 4883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F3C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F2C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F0C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F14244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F3C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F0C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F04244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F2C244883C420C4E2052F2FC462352F5740C4E22D2FBF80000000C462752F87C0000000C4622D2F9700010000C462152F8F40010000C4E2052F2FC462352F5740C4E22D2FBF80000000C462752F87C0000000C4622D2F9700010000C462152F8F40010000C4E2052F2FC462352F5740C4E22D2FBF80000000C462752F87C0000000C3
...
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D66801/new/
https://reviews.llvm.org/D66801
More information about the llvm-commits
mailing list