[PATCH] D66801: [X86][BtVer2] Fix latency and throughput of conditional SIMD store instructions.

Roman Lebedev via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 31 05:54:40 PDT 2019


lebedev.ri added a comment.

FWIW this does not //appear// to be the case on BdVer2:

  $ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPSmr
  Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-072799.o
  ---
  mode:            uops
  key:
    instructions:
      - 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x0 %noreg XMM6 XMM11'
      - 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x40 %noreg XMM4 XMM9'
      - 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x80 %noreg XMM12 XMM12'
      - 'VMASKMOVPSmr RDI i_0x1 %noreg i_0xc0 %noreg XMM6 XMM2'
      - 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x100 %noreg XMM1 XMM7'
      - 'VMASKMOVPSmr RDI i_0x1 %noreg i_0x140 %noreg XMM10 XMM15'
    config:          ''
    register_initial_values:
      - 'XMM6=0x0'
      - 'XMM11=0x0'
      - 'XMM4=0x0'
      - 'XMM9=0x0'
      - 'XMM12=0x0'
      - 'XMM2=0x0'
      - 'XMM1=0x0'
      - 'XMM7=0x0'
      - 'XMM10=0x0'
      - 'XMM15=0x0'
  cpu_name:        bdver2
  llvm_triple:     x86_64-unknown-linux-gnu
  num_repetitions: 10000
  measurements:
    - { key: PdFPU0, value: 8.0055, per_snippet_value: 48.033 }
    - { key: PdFPU1, value: 4.0124, per_snippet_value: 24.0744 }
    - { key: PdFPU2, value: 2.0042, per_snippet_value: 12.0252 }
    - { key: PdFPU3, value: 4.0078, per_snippet_value: 24.0468 }
    - { key: NumMicroOps, value: 18.0142, per_snippet_value: 108.085 }
  error:           ''
  info:            instruction is parallel, repeating a random one.
  assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C410C462492E1FC462592E4F40C462192EA780000000C4E2492E97C0000000C4E2712EBF00010000C462292EBF40010000C462492E1FC462592E4F40C462192EA780000000C4E2492E97C0000000C4E2712EBF00010000C462292EBF40010000C462492E1FC462592E4F40C462192EA780000000C4E2492E97C0000000C3
  ...
  $ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPDmr
  Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-28613e.o
  ---
  mode:            uops
  key:
    instructions:
      - 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x0 %noreg XMM8 XMM7'
      - 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x40 %noreg XMM14 XMM0'
      - 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x80 %noreg XMM11 XMM5'
      - 'VMASKMOVPDmr RDI i_0x1 %noreg i_0xc0 %noreg XMM4 XMM11'
      - 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x100 %noreg XMM12 XMM11'
      - 'VMASKMOVPDmr RDI i_0x1 %noreg i_0x140 %noreg XMM4 XMM0'
    config:          ''
    register_initial_values:
      - 'XMM8=0x0'
      - 'XMM7=0x0'
      - 'XMM14=0x0'
      - 'XMM0=0x0'
      - 'XMM11=0x0'
      - 'XMM5=0x0'
      - 'XMM4=0x0'
      - 'XMM12=0x0'
  cpu_name:        bdver2
  llvm_triple:     x86_64-unknown-linux-gnu
  num_repetitions: 10000
  measurements:
    - { key: PdFPU0, value: 7.9896, per_snippet_value: 47.9376 }
    - { key: PdFPU1, value: 4.0235, per_snippet_value: 24.141 }
    - { key: PdFPU2, value: 2.0042, per_snippet_value: 12.0252 }
    - { key: PdFPU3, value: 4.0077, per_snippet_value: 24.0462 }
    - { key: NumMicroOps, value: 18.0128, per_snippet_value: 108.077 }
  error:           ''
  info:            instruction is parallel, repeating a random one.
  assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C410C4E2392F3FC4E2092F4740C4E2212FAF80000000C462592F9FC0000000C462192F9F00010000C4E2592F8740010000C4E2392F3FC4E2092F4740C4E2212FAF80000000C462592F9FC0000000C462192F9F00010000C4E2592F8740010000C4E2392F3FC4E2092F4740C4E2212FAF80000000C462592F9FC0000000C3
  ...
  $ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPSYmr
  Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-f26721.o
  ---
  mode:            uops
  key:
    instructions:
      - 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x0 %noreg YMM5 YMM4'
      - 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x40 %noreg YMM2 YMM0'
      - 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x80 %noreg YMM15 YMM14'
      - 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0xc0 %noreg YMM10 YMM13'
      - 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x100 %noreg YMM7 YMM15'
      - 'VMASKMOVPSYmr RDI i_0x1 %noreg i_0x140 %noreg YMM15 YMM5'
    config:          ''
    register_initial_values:
      - 'YMM5=0x0'
      - 'YMM4=0x0'
      - 'YMM2=0x0'
      - 'YMM0=0x0'
      - 'YMM15=0x0'
      - 'YMM14=0x0'
      - 'YMM10=0x0'
      - 'YMM13=0x0'
      - 'YMM7=0x0'
  cpu_name:        bdver2
  llvm_triple:     x86_64-unknown-linux-gnu
  num_repetitions: 10000
  measurements:
    - { key: PdFPU0, value: 15.9929, per_snippet_value: 95.9574 }
    - { key: PdFPU1, value: 8.089, per_snippet_value: 48.534 }
    - { key: PdFPU2, value: 2.0012, per_snippet_value: 12.0072 }
    - { key: PdFPU3, value: 8.0068, per_snippet_value: 48.0408 }
    - { key: NumMicroOps, value: 34.018, per_snippet_value: 204.108 }
  error:           ''
  info:            instruction is parallel, repeating a random one.
  assembled_snippet: 4883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F2C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F24244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F14244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F04244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F3C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F34244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F14244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F2C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F3C244883C420C4E2552E27C4E26D2E4740C462052EB780000000C4622D2EAFC0000000C462452EBF00010000C4E2052EAF40010000C4E2552E27C4E26D2E4740C462052EB780000000C4622D2EAFC0000000C462452EBF00010000C4E2052EAF40010000C4E2552E27C4E26D2E4740C462052EB780000000C4622D2EAFC0000000C3
  ...
  $ ./bin/llvm-exegesis --mode=uops --opcode-name=VMASKMOVPDYmr
  Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-e45324.o
  ---
  mode:            uops
  key:
    instructions:
      - 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x0 %noreg YMM15 YMM5'
      - 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x40 %noreg YMM9 YMM10'
      - 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x80 %noreg YMM10 YMM7'
      - 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0xc0 %noreg YMM1 YMM8'
      - 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x100 %noreg YMM10 YMM10'
      - 'VMASKMOVPDYmr RDI i_0x1 %noreg i_0x140 %noreg YMM13 YMM9'
    config:          ''
    register_initial_values:
      - 'YMM15=0x0'
      - 'YMM5=0x0'
      - 'YMM9=0x0'
      - 'YMM10=0x0'
      - 'YMM7=0x0'
      - 'YMM1=0x0'
      - 'YMM8=0x0'
      - 'YMM13=0x0'
  cpu_name:        bdver2
  llvm_triple:     x86_64-unknown-linux-gnu
  num_repetitions: 10000
  measurements:
    - { key: PdFPU0, value: 16.0013, per_snippet_value: 96.0078 }
    - { key: PdFPU1, value: 8.0093, per_snippet_value: 48.0558 }
    - { key: PdFPU2, value: 2.0018, per_snippet_value: 12.0108 }
    - { key: PdFPU3, value: 8.0068, per_snippet_value: 48.0408 }
    - { key: NumMicroOps, value: 34.0168, per_snippet_value: 204.101 }
  error:           ''
  info:            instruction is parallel, repeating a random one.
  assembled_snippet: 4883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F3C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F2C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F0C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F14244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F3C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C5FE6F0C244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F04244883C4204883EC20C7042400000000C744240400000000C744240800000000C744240C00000000C744241000000000C744241400000000C744241800000000C744241C00000000C57E6F2C244883C420C4E2052F2FC462352F5740C4E22D2FBF80000000C462752F87C0000000C4622D2F9700010000C462152F8F40010000C4E2052F2FC462352F5740C4E22D2FBF80000000C462752F87C0000000C4622D2F9700010000C462152F8F40010000C4E2052F2FC462352F5740C4E22D2FBF80000000C462752F87C0000000C3
  ...


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66801/new/

https://reviews.llvm.org/D66801





More information about the llvm-commits mailing list