<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - [MIPS] All word-sized multiplies against constants are transformed into shift + add"

   href="https://bugs.llvm.org/show_bug.cgi?id=41929">41929</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>[MIPS] All word-sized multiplies against constants are transformed into shift + add

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: MIPS

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>husseydevin@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre><a href="https://github.com/llvm-mirror/llvm/blob/36cbe77a6eee47bfaebd17a713decac4433d6ae8/lib/Target/Mips/MipsSEISelLowering.cpp#L718">https://github.com/llvm-mirror/llvm/blob/36cbe77a6eee47bfaebd17a713decac4433d6ae8/lib/Target/Mips/MipsSEISelLowering.cpp#L718</a>

I've tested this code and the logic seems fishy. From the looks of it, this

always returns true for integers that match the word size.

For example, take this code.

unsigned multiply(unsigned x)

{

    return x * 0xAAAAAAAA; // 0b10101010101010101010101010101010

}

Clearly, with alternating 1's and 0's, turning this into a shift and add is

ridiculous. Unless you're Clang.

clang -O3 --target=mipsel-linux-gnu -S multiply.c

multiply:

        sll     $1, $4, 1

        sll     $2, $4, 3

        sll     $3, $4, 29

        sll     $5, $4, 27

        sll     $6, $4, 25

        sll     $7, $4, 23

        sll     $8, $4, 21

        sll     $9, $4, 19

        sll     $10, $4, 17

        addu    $1, $2, $1

        sll     $2, $4, 5

        addu    $1, $2, $1

        sll     $2, $4, 7

        addu    $1, $2, $1

        sll     $2, $4, 9

        addu    $1, $2, $1

        sll     $2, $4, 11

        addu    $1, $2, $1

        sll     $2, $4, 13

        addu    $1, $2, $1

        sll     $2, $4, 31

        sll     $4, $4, 15

        addu    $1, $4, $1

        addu    $1, $10, $1

        addu    $1, $9, $1

        addu    $1, $8, $1

        addu    $1, $7, $1

        addu    $1, $6, $1

        addu    $1, $5, $1

        addu    $1, $3, $1

        jr      $ra

        addu    $2, $2, $1

According to the code, it should stop at 8 steps. However, that is 31 steps,

far more than the latency of multiplication unless I am reading the

documentation incorrectly.

I would expect something like this:

unsigned multiply(unsigned x)

{

    unsigned a = 0xAAAAAAAA;

    __asm__("" : "+r" (a)); // disable constant propagation

    return x * a;

}

multiply:

        lui     $1, 43690

        ori     $2, $1, 43690

        jr      $ra

        mul     $2, $2, $4

It is even worse on mips64:

unsigned long long multiply64(unsigned long long x)

{

    return x * 0xAAAAAAAAAAAAAAAAULL;

}

clang -O3 -S --target=mips64el-linux-gnu multiply.c

multiply64:                             # @multiply64

        daddiu  $sp, $sp, -80

        sd      $gp, 72($sp)            # 8-byte Folded Spill

        sd      $23, 64($sp)            # 8-byte Folded Spill

        sd      $22, 56($sp)            # 8-byte Folded Spill

        sd      $21, 48($sp)            # 8-byte Folded Spill

        sd      $20, 40($sp)            # 8-byte Folded Spill

        sd      $19, 32($sp)            # 8-byte Folded Spill

        sd      $18, 24($sp)            # 8-byte Folded Spill

        sd      $17, 16($sp)            # 8-byte Folded Spill

        sd      $16, 8($sp)             # 8-byte Folded Spill

        dsll    $1, $4, 1

        dsll    $2, $4, 3

        dsll    $3, $4, 61

        dsll    $5, $4, 59

        dsll    $6, $4, 57

        dsll    $7, $4, 55

        dsll    $8, $4, 53

        dsll    $9, $4, 51

        dsll    $10, $4, 49

        dsll    $11, $4, 47

        dsll    $12, $4, 45

        dsll    $13, $4, 43

        dsll    $14, $4, 41

        dsll    $15, $4, 39

        dsll    $24, $4, 37

        dsll    $25, $4, 35

        dsll    $16, $4, 33

        dsll    $17, $4, 31

        dsll    $18, $4, 29

        dsll    $19, $4, 27

        dsll    $20, $4, 25

        dsll    $21, $4, 23

        dsll    $22, $4, 21

        dsll    $23, $4, 19

        dsll    $gp, $4, 17

        daddu   $1, $2, $1

        dsll    $2, $4, 5

        daddu   $1, $2, $1

        dsll    $2, $4, 7

        daddu   $1, $2, $1

        dsll    $2, $4, 9

        daddu   $1, $2, $1

        dsll    $2, $4, 11

        daddu   $1, $2, $1

        dsll    $2, $4, 13

        daddu   $1, $2, $1

        dsll    $2, $4, 63

        dsll    $4, $4, 15

        daddu   $1, $4, $1

        daddu   $1, $gp, $1

        ld      $gp, 72($sp)            # 8-byte Folded Reload

        daddu   $1, $23, $1

        ld      $23, 64($sp)            # 8-byte Folded Reload

        daddu   $1, $22, $1

        ld      $22, 56($sp)            # 8-byte Folded Reload

        daddu   $1, $21, $1

        ld      $21, 48($sp)            # 8-byte Folded Reload

        daddu   $1, $20, $1

        ld      $20, 40($sp)            # 8-byte Folded Reload

        daddu   $1, $19, $1

        ld      $19, 32($sp)            # 8-byte Folded Reload

        daddu   $1, $18, $1

        ld      $18, 24($sp)            # 8-byte Folded Reload

        daddu   $1, $17, $1

        ld      $17, 16($sp)            # 8-byte Folded Reload

        daddu   $1, $16, $1

        ld      $16, 8($sp)             # 8-byte Folded Reload

        daddu   $1, $25, $1

        daddu   $1, $24, $1

        daddu   $1, $15, $1

        daddu   $1, $14, $1

        daddu   $1, $13, $1

        daddu   $1, $12, $1

        daddu   $1, $11, $1

        daddu   $1, $10, $1

        daddu   $1, $9, $1

        daddu   $1, $8, $1

        daddu   $1, $7, $1

        daddu   $1, $6, $1

        daddu   $1, $5, $1

        daddu   $1, $3, $1

        daddu   $2, $2, $1

        jr      $ra

        daddiu  $sp, $sp, 80

This can have pretty nasty effects on binary size: 

xxhash's code (<a href="https://github.com/Cyan4973/xxHash">https://github.com/Cyan4973/xxHash</a>), which uses plenty of

multiplies against constants, results in a binary that is more than 2x larger

than GCC 8 with size optimizations.

mipsel-linux-gnu-gcc-8 -Os -c xxhash.c

stat -c "%s" xxhash.o

30736

clang-9 -Oz -c --target=mipsel-linux-gnu xxhash.c

stat -c "%s" xxhash.o

70080

A look at the assembly will show that every multiply is converted to shifts,

and the only multu instructions are for when we need the high bits of the

result (i.e. long multiply).</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>