[PATCH] D139283: [llvm-exegesis] parallel snippet generator: avoid Read-After-Write pitfail for instrs w/ tied variables
Roman Lebedev via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 4 16:42:50 PST 2022
lebedev.ri created this revision.
lebedev.ri added reviewers: RKSimon, andreadb, courbet, gchatelet.
lebedev.ri added a project: LLVM.
Herald added subscribers: mstojanovic, pengfei.
Herald added a project: All.
lebedev.ri requested review of this revision.
As it is being discussed in https://github.com/llvm/llvm-project/issues/59325,
at least for the instructions with tied variables,
when trying to parallelize the instructions,
register selection is rather bad, and may either
use a register which we have used for def,
or vice versa.
That introduces serialization, and leads to
overly pessimistic inverse throughput measurement.
The new implementation avoids that,
New result:
$ ./bin/llvm-exegesis --mode=inverse_throughput --opcode-name=VFMADD132PDr --max-configs-per-opcode=9182
Templates 2
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-1b37a6.o
---
mode: inverse_throughput
key:
instructions:
- 'VFMADD132PDr XMM14 XMM14 XMM7 XMM10'
- 'VFMADD132PDr XMM12 XMM12 XMM1 XMM6'
- 'VFMADD132PDr XMM3 XMM3 XMM8 XMM7'
- 'VFMADD132PDr XMM4 XMM4 XMM6 XMM8'
- 'VFMADD132PDr XMM11 XMM11 XMM9 XMM15'
- 'VFMADD132PDr XMM2 XMM2 XMM1 XMM0'
- 'VFMADD132PDr XMM13 XMM13 XMM6 XMM1'
- 'VFMADD132PDr XMM5 XMM5 XMM9 XMM9'
config: ''
register_initial_values:
- 'XMM14=0x0'
- 'XMM7=0x0'
- 'XMM10=0x0'
- 'MXCSR=0x0'
- 'XMM12=0x0'
- 'XMM1=0x0'
- 'XMM6=0x0'
- 'XMM3=0x0'
- 'XMM8=0x0'
- 'XMM4=0x0'
- 'XMM11=0x0'
- 'XMM9=0x0'
- 'XMM15=0x0'
- 'XMM2=0x0'
- 'XMM0=0x0'
- 'XMM13=0x0'
- 'XMM5=0x0'
cpu_name: znver3
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: inverse_throughput, value: 0.5419, per_snippet_value: 4.3352 }
error: ''
info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C410C442C198F2C462F198E6C4E2B998DFC4C2C998E0C442B198DFC4E2F198D0C462C998E9C4C2B198E9C442C198F2C462F198E6C4E2B998DFC4C2C998E0C442B198DFC4E2F198D0C462C998E9C4C2B198E9C442C198F2C462F198E6C4E2B998DFC4C2C998E0C442B198DFC4E2F198D0C462C998E9C4C2B198E9C442C198F2C462F198E6C4E2B998DFC4C2C998E0C442B198DFC4E2F198D0C462C998E9C4C2B198E9C3
...
Check generated assembly with: /usr/bin/objdump -d /tmp/snippet-0df694.o
---
mode: inverse_throughput
key:
instructions:
- 'VFMADD132PDr XMM5 XMM5 XMM14 XMM14'
- 'VFMADD132PDr XMM13 XMM13 XMM14 XMM14'
- 'VFMADD132PDr XMM12 XMM12 XMM14 XMM14'
- 'VFMADD132PDr XMM11 XMM11 XMM14 XMM14'
- 'VFMADD132PDr XMM6 XMM6 XMM14 XMM14'
- 'VFMADD132PDr XMM10 XMM10 XMM14 XMM14'
- 'VFMADD132PDr XMM4 XMM4 XMM14 XMM14'
- 'VFMADD132PDr XMM9 XMM9 XMM14 XMM14'
- 'VFMADD132PDr XMM1 XMM1 XMM14 XMM14'
- 'VFMADD132PDr XMM8 XMM8 XMM14 XMM14'
- 'VFMADD132PDr XMM15 XMM15 XMM14 XMM14'
- 'VFMADD132PDr XMM3 XMM3 XMM14 XMM14'
- 'VFMADD132PDr XMM7 XMM7 XMM14 XMM14'
- 'VFMADD132PDr XMM0 XMM0 XMM14 XMM14'
- 'VFMADD132PDr XMM2 XMM2 XMM14 XMM14'
config: ''
register_initial_values:
- 'XMM5=0x0'
- 'XMM14=0x0'
- 'MXCSR=0x0'
- 'XMM13=0x0'
- 'XMM12=0x0'
- 'XMM11=0x0'
- 'XMM6=0x0'
- 'XMM10=0x0'
- 'XMM4=0x0'
- 'XMM9=0x0'
- 'XMM1=0x0'
- 'XMM8=0x0'
- 'XMM15=0x0'
- 'XMM3=0x0'
- 'XMM7=0x0'
- 'XMM0=0x0'
- 'XMM2=0x0'
cpu_name: znver3
llvm_triple: x86_64-unknown-linux-gnu
num_repetitions: 10000
measurements:
- { key: inverse_throughput, value: 0.5314, per_snippet_value: 7.971 }
error: ''
info: instruction has tied variables, avoiding Read-After-Write issue, picking random def registers not aliasing a single random use register
assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C410C4C28998EEC4428998EEC4428998E6C4428998DEC4C28998F6C4428998D6C4C28998E6C4428998CEC4C28998CEC4428998C6C4428998FEC4C28998DEC4C28998FEC4C28998C6C4C28998D6C4C28998EEC4428998EEC4428998E6C4428998DEC4C28998F6C4428998D6C4C28998E6C4428998CEC4C28998CEC4428998C6C4428998FEC4C28998DEC4C28998FEC4C28998C6C4C28998D6C4C28998EEC4428998EEC4428998E6C4428998DEC4C28998F6C4428998D6C4C28998E6C4428998CEC4C28998CEC4428998C6C4428998FEC4C28998DEC4C28998FEC4C28998C6C4C28998D6C4C28998EEC4428998EEC4428998E6C4428998DEC4C28998F6C4428998D6C4C28998E6C4428998CEC4C28998CEC4428998C6C4428998FEC4C28998DEC4C28998FEC4C28998C6C4C28998D6C3
...
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D139283
Files:
llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
llvm/tools/llvm-exegesis/lib/CodeTemplate.h
llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp
llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D139283.479950.patch
Type: text/x-patch
Size: 14268 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221205/d1a76e1a/attachment.bin>
More information about the llvm-commits
mailing list