[PATCH] D55263: [CodeGen][ExpandMemcmp] Add an option for allowing overlapping loads.
Clement Courbet via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 5 07:48:19 PST 2018
courbet added a comment.
Here's a basic benchmark for `memcmp(a, b, N)` where N is a compile-time constant, and a and b differ first at character M:
F7651979: D55263.cc <https://reviews.llvm.org/F7651979>
The change makes the impacted values **2.5 - 3x as fast**.
| "BMCmp<N, M>" | base | this change | speedup |
| "BM_Cmp<0, -1>" | 0.293 | 0.292 | 1.003424658 |
| "BM_Cmp<1, -1>" | 0.64 | 0.64 | 1 |
| "BM_Cmp<2, -1>" | 0.637 | 0.636 | 1.001572327 |
| "BM_Cmp<3, -1>" | 1.08 | 1.08 | 1 |
| "BM_Cmp<4, -1>" | 0.637 | 0.637 | 1 |
| "BM_Cmp<5, -1>" | 1.08 | 1.08 | 1 |
| "BM_Cmp<6, -1>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<7, -1>" | 2.82 | 1.03 | 2.737864078 |
| "BM_Cmp<8, -1>" | 0.637 | 0.637 | 1 |
| "BM_Cmp<9, -1>" | 1.08 | 1.08 | 1 |
| "BM_Cmp<10, -1>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<11, -1>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<12, -1>" | 1.03 | 1.03 | 1 |
| "BM_Cmp<13, -1>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<14, -1>" | 3.09 | 1.03 | 3 |
| "BM_Cmp<15, -1>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<16, -1>" | 0.843 | 0.844 | 0.9988151659 |
| "BM_Cmp<17, -1>" | 1.33 | 1.33 | 1 |
| "BM_Cmp<18, -1>" | 1.33 | 1.33 | 1 |
| "BM_Cmp<19, -1>" | 3.36 | 1.26 | 2.666666667 |
| "BM_Cmp<20, -1>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<21, -1>" | 3.07 | 1.18 | 2.601694915 |
| "BM_Cmp<22, -1>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<23, -1>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<24, -1>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<25, -1>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<26, -1>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<27, -1>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<28, -1>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<29, -1>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<30, -1>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<31, -1>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<32, -1>" | 1.26 | 1.25 | 1.008 |
| "BM_Cmp<0, 0>" | 0.286 | 0.285 | 1.003508772 |
| "BM_Cmp<1, 0>" | 0.635 | 0.635 | 1 |
| "BM_Cmp<2, 0>" | 0.634 | 0.633 | 1.001579779 |
| "BM_Cmp<3, 0>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<4, 0>" | 0.641 | 0.634 | 1.011041009 |
| "BM_Cmp<5, 0>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<6, 0>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<7, 0>" | 2.79 | 1.03 | 2.708737864 |
| "BM_Cmp<8, 0>" | 0.633 | 0.632 | 1.001582278 |
| "BM_Cmp<9, 0>" | 1.07 | 1.08 | 0.9907407407 |
| "BM_Cmp<10, 0>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<11, 0>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<12, 0>" | 1.04 | 1.03 | 1.009708738 |
| "BM_Cmp<13, 0>" | 3.1 | 1.03 | 3.009708738 |
| "BM_Cmp<14, 0>" | 3.09 | 1.03 | 3 |
| "BM_Cmp<15, 0>" | 3.09 | 1.03 | 3 |
| "BM_Cmp<16, 0>" | 0.844 | 0.843 | 1.00118624 |
| "BM_Cmp<17, 0>" | 1.33 | 1.32 | 1.007575758 |
| "BM_Cmp<18, 0>" | 1.33 | 1.32 | 1.007575758 |
| "BM_Cmp<19, 0>" | 3.37 | 1.26 | 2.674603175 |
| "BM_Cmp<20, 0>" | 1.22 | 1.21 | 1.008264463 |
| "BM_Cmp<21, 0>" | 3.09 | 1.26 | 2.452380952 |
| "BM_Cmp<22, 0>" | 3.08 | 1.26 | 2.444444444 |
| "BM_Cmp<23, 0>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<24, 0>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<25, 0>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<26, 0>" | 3.63 | 1.27 | 2.858267717 |
| "BM_Cmp<27, 0>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<28, 0>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<29, 0>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<30, 0>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<31, 0>" | 3.36 | 1.26 | 2.666666667 |
| "BM_Cmp<32, 0>" | 1.26 | 1.26 | 1 |
| "BM_Cmp<0, 7>" | 0.289 | 0.287 | 1.006968641 |
| "BM_Cmp<1, 7>" | 0.64 | 0.635 | 1.007874016 |
| "BM_Cmp<2, 7>" | 0.638 | 0.633 | 1.007898894 |
| "BM_Cmp<3, 7>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<4, 7>" | 0.634 | 0.635 | 0.9984251969 |
| "BM_Cmp<5, 7>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<6, 7>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<7, 7>" | 2.81 | 1.03 | 2.72815534 |
| "BM_Cmp<8, 7>" | 0.637 | 0.632 | 1.007911392 |
| "BM_Cmp<9, 7>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<10, 7>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<11, 7>" | 3.37 | 1.03 | 3.27184466 |
| "BM_Cmp<12, 7>" | 1.03 | 1.03 | 1 |
| "BM_Cmp<13, 7>" | 3.64 | 1.03 | 3.533980583 |
| "BM_Cmp<14, 7>" | 3.36 | 1.03 | 3.262135922 |
| "BM_Cmp<15, 7>" | 3.63 | 1.03 | 3.524271845 |
| "BM_Cmp<16, 7>" | 0.842 | 0.844 | 0.9976303318 |
| "BM_Cmp<17, 7>" | 1.33 | 1.33 | 1 |
| "BM_Cmp<18, 7>" | 1.33 | 1.33 | 1 |
| "BM_Cmp<19, 7>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<20, 7>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<21, 7>" | 3.93 | 1.26 | 3.119047619 |
| "BM_Cmp<22, 7>" | 3.9 | 1.26 | 3.095238095 |
| "BM_Cmp<23, 7>" | 3.93 | 1.25 | 3.144 |
| "BM_Cmp<24, 7>" | 1.22 | 1.21 | 1.008264463 |
| "BM_Cmp<25, 7>" | 3.92 | 1.26 | 3.111111111 |
| "BM_Cmp<26, 7>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<27, 7>" | 3.92 | 1.26 | 3.111111111 |
| "BM_Cmp<28, 7>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<29, 7>" | 3.93 | 1.26 | 3.119047619 |
| "BM_Cmp<30, 7>" | 3.93 | 1.26 | 3.119047619 |
| "BM_Cmp<31, 7>" | 3.93 | 1.26 | 3.119047619 |
| "BM_Cmp<32, 7>" | 1.26 | 1.26 | 1 |
| "BM_Cmp<0, 15>" | 0.287 | 0.287 | 1 |
| "BM_Cmp<1, 15>" | 0.637 | 0.635 | 1.003149606 |
| "BM_Cmp<2, 15>" | 0.633 | 0.631 | 1.003169572 |
| "BM_Cmp<3, 15>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<4, 15>" | 0.634 | 0.633 | 1.001579779 |
| "BM_Cmp<5, 15>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<6, 15>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<7, 15>" | 2.79 | 1.03 | 2.708737864 |
| "BM_Cmp<8, 15>" | 0.635 | 0.64 | 0.9921875 |
| "BM_Cmp<9, 15>" | 1.07 | 1.08 | 0.9907407407 |
| "BM_Cmp<10, 15>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<11, 15>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<12, 15>" | 1.03 | 1.03 | 1 |
| "BM_Cmp<13, 15>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<14, 15>" | 3.09 | 1.03 | 3 |
| "BM_Cmp<15, 15>" | 3.09 | 1.03 | 3 |
| "BM_Cmp<16, 15>" | 0.842 | 0.844 | 0.9976303318 |
| "BM_Cmp<17, 15>" | 1.33 | 1.33 | 1 |
| "BM_Cmp<18, 15>" | 1.32 | 1.33 | 0.992481203 |
| "BM_Cmp<19, 15>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<20, 15>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<21, 15>" | 3.91 | 1.26 | 3.103174603 |
| "BM_Cmp<22, 15>" | 3.92 | 1.26 | 3.111111111 |
| "BM_Cmp<23, 15>" | 3.94 | 1.26 | 3.126984127 |
| "BM_Cmp<24, 15>" | 1.22 | 1.21 | 1.008264463 |
| "BM_Cmp<25, 15>" | 3.91 | 1.26 | 3.103174603 |
| "BM_Cmp<26, 15>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<27, 15>" | 3.92 | 1.26 | 3.111111111 |
| "BM_Cmp<28, 15>" | 3.65 | 1.26 | 2.896825397 |
| "BM_Cmp<29, 15>" | 3.93 | 1.25 | 3.144 |
| "BM_Cmp<30, 15>" | 3.93 | 1.26 | 3.119047619 |
| "BM_Cmp<31, 15>" | 3.92 | 1.26 | 3.111111111 |
| "BM_Cmp<32, 15>" | 1.26 | 1.26 | 1 |
| "BM_Cmp<0, 24>" | 0.285 | 0.286 | 0.9965034965 |
| "BM_Cmp<1, 24>" | 0.639 | 0.638 | 1.001567398 |
| "BM_Cmp<2, 24>" | 0.634 | 0.633 | 1.001579779 |
| "BM_Cmp<3, 24>" | 1.07 | 1.07 | 1 |
| "BM_Cmp<4, 24>" | 0.636 | 0.633 | 1.004739336 |
| "BM_Cmp<5, 24>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<6, 24>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<7, 24>" | 2.8 | 1.03 | 2.718446602 |
| "BM_Cmp<8, 24>" | 0.633 | 0.635 | 0.9968503937 |
| "BM_Cmp<9, 24>" | 1.07 | 1.08 | 0.9907407407 |
| "BM_Cmp<10, 24>" | 1.08 | 1.07 | 1.009345794 |
| "BM_Cmp<11, 24>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<12, 24>" | 1.03 | 1.03 | 1 |
| "BM_Cmp<13, 24>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<14, 24>" | 3.08 | 1.03 | 2.990291262 |
| "BM_Cmp<15, 24>" | 3.09 | 1.03 | 3 |
| "BM_Cmp<16, 24>" | 0.844 | 0.843 | 1.00118624 |
| "BM_Cmp<17, 24>" | 1.33 | 1.33 | 1 |
| "BM_Cmp<18, 24>" | 1.33 | 1.32 | 1.007575758 |
| "BM_Cmp<19, 24>" | 3.37 | 1.26 | 2.674603175 |
| "BM_Cmp<20, 24>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<21, 24>" | 3.08 | 1.26 | 2.444444444 |
| "BM_Cmp<22, 24>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<23, 24>" | 3.07 | 1.26 | 2.436507937 |
| "BM_Cmp<24, 24>" | 1.21 | 1.21 | 1 |
| "BM_Cmp<25, 24>" | 3.35 | 1.26 | 2.658730159 |
| "BM_Cmp<26, 24>" | 3.63 | 1.26 | 2.880952381 |
| "BM_Cmp<27, 24>" | 4.21 | 1.26 | 3.341269841 |
| "BM_Cmp<28, 24>" | 3.94 | 1.26 | 3.126984127 |
| "BM_Cmp<29, 24>" | 4.2 | 1.26 | 3.333333333 |
| "BM_Cmp<30, 24>" | 4.2 | 1.26 | 3.333333333 |
| "BM_Cmp<31, 24>" | 4.48 | 1.26 | 3.555555556 |
| "BM_Cmp<32, 24>" | 1.27 | 1.26 | 1.007936508 |
|
Repository:
rL LLVM
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D55263/new/
https://reviews.llvm.org/D55263
More information about the llvm-commits
mailing list