<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/79794>79794</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [x86] bitReverse of a byte or half-word can probably be optimized
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          Validark
      </td>
    </tr>
</table>

<pre>
    [Godbolt link](https://zig.godbolt.org/z/f6PTsr3nj)

```zig
const std = @import("std");

inline fn pext(src: u64, mask: u64) u64 {
 return asm ("pext %[mask], %[src], %[ret]"
        : [ret] "=r" (-> u64),
        : [src] "r" (src),
          [mask] "r" (mask),
 );
}

const T = u8;

export fn byteReverseDefault(x: T) u64 {
    return @bitReverse(x);
}

export fn byteReversePext(x: u8) u64 {
    return pext(@as(u64, x) * 0x0101010101010101, @as(u64, @bitCast(@as(@Vector(8, u8), @splat(1)) << ~std.simd.iota(u3, 8))));
}

export fn byteReverseVector(x: T) T {
    const vec = @as(@Vector(@bitSizeOf(T), T), @splat(x));
    const mask = @as(@Vector(@bitSizeOf(T), T), @splat(1)) << ~std.simd.iota(std.math.Log2Int(T), @bitSizeOf(T));
    return @bitCast((vec & mask) == mask);
}
```

```asm
byteReverseDefault:
        rol dil, 4
        mov     eax, edi
        shr     dil, 2
        and al, 51
        and     dil, 51
        shl     al, 2
        or dil, al
        mov     eax, edi
        shr     dil
        and al, 85
        and     dil, 85
        add     al, al
        or dil, al
        movzx   eax, dil
 ret

byteReversePext:
        movabs  rcx, 72340172838076673
 mov     eax, edi
        imul    rcx, rax
        movabs  rax, 72624976668147840
        pext    rax, rcx, rax
 ret

.LCPI2_0:
        .byte   128
        .byte   64
 .byte   32
        .byte   16
        .byte   8
        .byte 4
        .byte   2
        .byte   1
        .zero   1
 .zero   1
        .zero   1
        .zero   1
        .zero   1
 .zero   1
        .zero   1
        .zero 1
byteReverseVector: ; the compiler will not give you this emit in godbolt
        vpbroadcastq    xmm1, qword ptr [rip + .LCPI2_0]
 vmovd   xmm0, edi
        vpbroadcastb    xmm0, xmm0
        vpand xmm0, xmm0, xmm1
        vpcmpeqb        xmm0, xmm0, xmm1
 vpmovmskb       eax, xmm0
        ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysVk2P2zgP_jXKhZhAlvyVQw4zyZsXBQpssRv0upBtJVFHtlxJTjM57G9fSLYntuNM0WLbYASRD8mHpCSTGSOOFedrFL2gaLtgjT0pvf7KpCiYfl1kqnhzuv-rIlPSghTVK4q2iKQna2uD6DMiO0R2V3FcHlvMUumjkyCyO8Rf9kbT6hsiK4S3CD93f2Pc_q7i2EpyVRkLxhaA6BZQiEVZK20RSREhxhaIEOeDvgzdiEqKisOhgppfHNboHNFnaOIQkQ2UzLy-b1duAZR0DkBz2-gKmCmhDeJcACIRil68oUty0wmc3-Fec-v3pHPW_XPB3rXg1HSrESEuwhOi_-uYILKZtWujOLveyElm8AA3kkO4Fw3xo5Il22Ht2oLvfbmbdFJYfnHFd4XN3iz_k5-5NnzLD6yRrswXR3h_X1OAvqwoxJmwnaG3eMxkNtiXtqE-UpN-FKprPQoxM4ikXe9dQEDkGfAFB-P_vo1jdEt3w8zQEwrxV55bpRFJU4fyPDq4qSVz4MCLVoDoBtEN_GNssTSiLJZCWeYCUGfQGva_X6nDO4NbzffjMrSNPPO8vzl35Nvs_hJX_scBkXTfZbG_z-YyYXjz747WfxDgZ-Vy-5LZ0_KzOpJPlR14mw0ypjo6fH03SeprQ2LorodLw2XS35b7dvSv0-yTxUzZSmauBn0eX1StJBRCOvrhWFOqs185uzgtL8RYb07ar5315KFhVQHMK6LgXjOwm6rNSbawOa9K92ZM_j7bB0TT6EOid-qiGBCd8vmQ6fVyY3oj5B7lQTunT820c6U6s8wA6Nz7SQgNcZCQlKY4ieOEduifFUaUjS9450azy4MwrAsTk3CVxHGcBmGShniM9p8oeEffOZ3kuPy8-fKJ_I3vklu67AEgIOm8Iu5Pay-g5IGHeF4-6zecxz5yPRFfuVYD8XT_APZr4t-xDu4OVPcuuo86fQF74pCrshaSa_ghpIRKWTiKM4c31YA9CQO8FBZEBd0ENQ5zrjOtWJEzY7-7_aUs_Tfs-w-lC6it9jOHqAGRF3jvedQ9ZnAu1blozfDsGR34z2AA9OsE6W7tSN-uwRSXlzX_nvX7DyzOdanOpXntsd1Fuo99O9v9Q7wo1rRY0RVb8HWQ4DgKojhKFqc1DoMwTg5RHiYJTbIDp6uQ57hIA8ppkeGFWBNMQhyQFU5xQpNlUuQxjQ44Xx14URCMQsxLJuRSynPp5tmFMKbh62SVrMKFZBmXxk_MhFT8B3ilG_ei7UKvnc1T1hwNCrEUxpqbFyus9KP2JY3d7HabkUAdgPkPv3vcTkwennx7c1ZBrVXGMvkGGQdVW1GKKy8WjZbr8Qx-FPbUZMtclYjsXMxueaq1-sZzi8jOMzWI7Hwm_wYAAP__XiUtgA">