<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=http://email.email.llvm.org/c/eJy1WEuTozYQ_jX4orILBPhx8GHG3kn2kOzW7FZt5eSSQdjKCokgMbbn16cl3vgxntnEZUAS6tfX6m6JrYxPy73WmXL8Bwc_wX8n463keiLzHfRe4frL__pP_oMeieOuHfehuuP5n1JTIENEIUWpQEwgvaeoYoA4Ez8dvELpCTGlCopkznZMEM7NgJn9XChtZmwLjZg2TFKFtEQky2AONFYOfoS_kXCgnE8cvOjq8H0PfCIZU5RLTTRVVj4nAlpExCBC0VxDGwl6QAnLlUaU05QKPan4TN3yX0mqbfOZiHgBjB1_dZxPmdA5E5O943_qyt9sUhxOa5YbtWcJ2DOvho_GtEgKEJpwSTRy8IOdsmECDEHOrBKH4FfRnEDgGm1S09lkNE8LTTfgHDy33NzjwjcY-OeUXB66tKXpiQfaWPJ2vqUpJ0VEgefLOdA3qqfpBnSNN8oQdbS1_9VFLvSocxJ1ZJ2sqpcovLY7NCOnushFzXPLqYg73MC60n7P6xI6s_XAj133rGAl7JCxo9jKTLOUcEv8MKAhKu2r-WKQZzxTVf-Ypp4RD0_XPL0gqKcCe9--t-DbCeGjmWQuQ4SdcD3g3gGs4o4N11PNfTDdYtHo0ihRk9X9Oer-KrXcWi1cqdWohFeVsv65gufmu33zZ1fllBxvmt8uTduvzT4NQR6QpfJFqdYX8UHmMcp0jkBiHjOQc25-6ZWUptZ6a_UrzeXgcRPujno9Nb0zuE81DHZKI_BUglKCgVcGnACuEC6IuNWsAejGSv5tBfnp6fNqZVYzPWY00jSGkFEF1x9b0kOzwKe38V7_-PK8Rl-_P7d4v-lX75JfIYY_iPjsBkK2FHx-Rjv2UpWBAUzosIcSlRGlYAhKC9BzHhkuZ-BVXf8R_SHjgtPPa-tWB89sQvTxcT4JEi-KklkwjnbFBNSsdFOyyCO6SRiYRFJa0eHrdLik0yTfUY1iogknJwn1sKKk4xQUpOMMz4ymPrY30_X6XdOaBvY2ZnUjmRsi8MhYzE1jWtPAu28wPJQPVS7jjdZQ-DYwsRA_hTyIMdTz4jjeiaKhaoF6KkSkmRToQevc7CVQCrU9y-UO0FdIyCSnFB7qJCJ4FOLAoD4LKTh5PW1NuzhoAgsBHRjnZS0oucc0YcLW4Tk6loUUqjByAndYds-m4NAWzXIAP5iBbv0thHFRvCFxnJsAdtuKrAAP3SwMCG8wsYkgx11ciJ6rqeA8I5qg7yaBNpJAw5JO7Ysk4fQFVrDMz42vLBuOZ5IpKTpvmHH2J9ODFjJamCHk1g2vbuC6MasbQd0I68Z0sPupYLF5AjD5_xIzmFvONzuT2p3X_LpChLOdAP1bamypy_xUrRqDSXAZu3qzFnotXudmt9muXA8Xc53XUcK_7Nhg4Fhcue-Kchcde8Ofjc8LAZF0b6d2tFU8uG9Fum-uyOB9iveUCO9DL_gAeu9C5u0IGig-vQ89_xJ6xu5fcrrXDHntWBPZXng5oqt6fCHPtaX4o5ufetTMCXpzzjdEFr_ZffhNr-Dnvo3fosGjhbTBNHgngFC6LqlRb12a84q9EyiWDA6_sGMp64-pu7PH_6B2mvJd1vRxlNmC7a_Lij6GvQHGnfcJJUBBVTMJjsHk5Wgn3Txe_S4P4JDcgGFPV-yVwXELzvHlQfvA9N7ssuAdGn_xoFXtxOzRP2ZJQnOTiNvDmcnQOi_sRkLd2NPCwuwW4-HpzCSLi-Wmczxpr3b3vbia0-vTj9fJ6WZ-p_Thu0sfvr_0GRm9YOwVmVMl9eJJxKuDEfeC0bvzJLJ4KUQW_eRZfFmwe03ceez3hePeO9wTaYJ8mHZ6LvCgkr-VdprjbS_19NNOtekapJ5wuNhH8dKPF_6CjEih9zJfZnvGWRaNipwPP5rBYi-2k0im0OH8pX6MIYz_hqQFXfsNDKLsKfQ9HIz2y-l8QcMwJtTbJjNYc54XzqaJ69FFOCX-fD7iZEu5WoKCEIzmC5ZlYSI0XI9-XQO2xC7Grud57iL08XRCtm68mEdhHLhbjBeu2WOnhPGJ4WO-B47ypWW5LXYKXnKmtGpfwtEKNl6UWoVBQ800p8tvbXibL3U7OIQlkMIhE6Eqm5ff7kZWu6VV7V-aSJ-o>53124</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Suboptimal codegen for x86 vector rotate
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          philipc
      </td>
    </tr>
</table>

<pre>
    https://godbolt.org/z/Y3PqrWexa

(Note: as seen in the godbolt link, my issue originally is in Rust, but it seems to apply to C++ as well.)

This code rotates the lanes and inserts a new first element.
```C++
#include <x86intrin.h>

__m256 element_shift(__m256 x, const float *shift_in) {
    __m256 y = _mm256_permute_ps(x, 0x93);
    __m256 low = _mm256_insertf128_ps(
        _mm256_castps128_ps256(_mm_load_ss(shift_in)),
        _mm256_extractf128_ps(y, 0),
        1
    );
    return _mm256_blend_ps(y, low, 0x11);
}
```

Clang (suboptimal):
```asm
        vpermilps       xmm1, xmm0, 144         # xmm1 = xmm0[0,0,1,2]
        vextractf128    xmm2, ymm0, 1
        vblendps        xmm0, xmm2, xmm0, 8             # xmm0 = xmm2[0,1,2],xmm0[3]
        vpermilps       xmm0, xmm0, 147         # xmm0 = xmm0[3,0,1,2]
        vinsertf128     ymm0, ymm1, xmm0, 1
        vmovss  xmm1, dword ptr [rdi]           # xmm1 = mem[0],zero,zero,zero
        vblendps        ymm0, ymm0, ymm1, 1             # ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]

```

GCC/ICC (expected result):
```asm
        vpermilps       ymm0, ymm0, 147
        vmovss  xmm1, DWORD PTR [rdi]
        vinsertf128     ymm1, ymm1, xmm0, 0x1
        vblendps        ymm0, ymm0, ymm1, 17
```

This IR gives the expected result when passed to `llc`:
```
; ModuleID = 'shift32x8.4f1ccf74-cgu.0'
source_filename = "shift32x8.4f1ccf74-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree nosync nounwind nonlazybind uwtable willreturn
define <8 x float> @element_shift(<8 x float> %x, float* %shift_in) unnamed_addr #0 {
start:
  ; vpermilps   ymm0, ymm0, 147         # ymm0 = ymm0[3,0,1,2,7,4,5,6]
  %0 = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>

  ; vmovss      xmm1, dword ptr [rdi]           # xmm1 = mem[0],zero,zero,zero
  %1 = load float, float* %shift_in, align 4
  %2 = insertelement <4 x float> poison, float %1, i32 0

  ; vinsertf128 ymm1, ymm1, xmm0, 1
  %3 = shufflevector <4 x float> %2, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
  %4 = shufflevector <8 x float> %0, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %5 = shufflevector <4 x float> %4, <4 x float> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
  %6 = shufflevector <8 x float> %3, <8 x float> %5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>

  ; vblendps    ymm0, ymm0, ymm1, 17            # ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7]
  %7 = shufflevector <8 x float> %6, <8 x float> %0, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 13, i32 14, i32 15>

  ret <8 x float> %7
}

attributes #0 = { mustprogress nofree nosync nounwind nonlazybind uwtable willreturn "target-cpu"="x86-64" "target-features"="+avx" }
```

However, optimizing it first with `opt -O1` results in different suboptimal instructions:
```asm
        vpermilps       xmm1, xmm0, 255         # xmm1 = xmm0[3,3,3,3]
        vinsertf128     ymm1, ymm0, xmm1, 1
        vmovss  xmm2, dword ptr [rdi]           # xmm2 = mem[0],zero,zero,zero
        vblendps        ymm1, ymm1, ymm2, 1             # ymm1 = ymm2[0],ymm1[1,2,3,4,5,6,7]
        vunpcklpd       ymm1, ymm1, ymm0        # ymm1 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
        vshufps ymm0, ymm1, ymm0, 152           # ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy1WFmTozYQ_jX4RWUXCPDx4IcZeyfZh2S3Jlu1lSeXDMJWVkgECR_z69MSNz7GO0lcBiShvr5Wd0tsZXxe7rXOlOM_OfgF_jsZbyXXE5nvoPcG15_-17_z7_REHHftuE_VHc9_l5oCGSIKKUoFYgLpPUUVA8SZ-OHgFUrPiClVUCRztmOCcG4GzOzXQmkzY1toxLRhkiqkJSJZBnOgsXLwM_yNhCPlfOLgRVeHb3vgE8mYolxqoqmy8jkR0CIiBhGK5hraSNAjSliuNKKcplToScVn6pb_SlJtm89ExAtg7Pir03zKhM6ZmOwd_1NX_maT4nBas9yoPUvAnnk1fDKmRVKA0IRLopGDn-yUDRNgCHJmlTgEv4rmDALXaJOaziajeVpougHn4Lnl5p4WvsHAv6Tk8tilLU1PPNDGkrfzLU05KSIKPF_Ogb5RPU03oGu8UYaoo639r65yoSedk6gj62xVvUbhtd2hGTnVRS5qnltORdzhBtaV9ntel9CZrQd-7LpnBSthh4wdxVZmmqWEW-KnAQ1RaV_Ng0Ge8UxV_VOaekY8PF3z9IKgngrsffvegm8nhM9mkrkMEXbC9YB7B7CKOzZczzX3wXSLRaNLo0RNVvfnqPur1HJrtXClVqMSXlXK-pcKXprv9s2f3ZRTcrxrfrs0bb82-zwEeUCWyoNSrS_io8xjlOkcgcQ8ZiDn0vzSKylNrfXW6jeay8HjLtwd9Xpqehdwn2sY7JRG4LkEpQQDrww4AVwhXBBxq1kD0J2V_MsK8tPL59XKrGZ6ymikaQwhowquP7akh2aBT-_jvf7-5XWNvn57bfF-16_eNb9CDH8Q8dkdhGwp-PyKduxQlYEBTOi4hxKVEaVgCEoL0HMeGS4X4FVd_xn9JuOC089r61YHz2xC9PFpPgkSL4qSWTCOdsUE1Kx0U7LII7pJGJhEUlrR4dt0uKTTJN9RjWKiCSdnCfWwoqTjFBSk4wzPjKY-tjfT9fpd05oG9jZmdSOZGyLwyFjMTWNa08C7P2B4KB-qXMYbraHwbWBiIX4IeRRjqOfFabwTRUPVAvVSiEgzKdCT1rnZS6AUanuWyx2gr5CQSU4pPNRZRPAoxJFBfRZScPJ23pp2cdQEFgI6Ms7LWlByj2nChK3Dc3QqCylUYeQE7rDsXkzBoS2a5QB-MgPd-lsI46J4Q-I4NwHsthVZAR66WRgQ3mBiE0GOu7gSPTdTwWVGNEHfTQJtJIGGJZ3aF0nC6QFWsMwvja8sG45nkikpOm-YcfYn04MWMlqYIeTWDa9u4LoxqxtB3QjrxnSw-6lgsXkCMPn_EjOYW843O5Panbf8ukKEs50A_VtqbKnL_FStGoNJcB27erMWei1el2a32a5cD1dznddRwr_u2GDgWFy574ZyVx17x5-NzwsBkfRop3a0VTx4bEW6767I4OcU7ykRPoZe8AH0fgqZ9yNooPj0MfT8a-gZu_-V071myGvHmsj2wusRXdXjK3muLcUf3fzUo2ZO0JtzuSGy-M0ew296Az_3ffwWDR4tpA2mwU8CCKXrmhr11qU5r9g7gWLJ4PALO5ay_pi6O3v-D2qnKd9lTR9HmS3Y_rqs6GPYG2DceZ9QAhRUNZPgGEwOJzvp7vHqV3kEh-QGDHu6Ym8Mjltwji8P2kem92aXBe_Q-IsHrWonZo_-MUsSmptE3B7OTIbWeWE3EurOnhYWZrcYD09nJllcLTed40l7tbvvxc2cXp9-vE5ON_M7pQ8_XPrw46XPyOgFY6_InCupV08iXh2MuBeM3oMnkcWhEFn0g2fxdcHuLXGXsd8XjnvvcE-kCfJh2um5wINK_l7aaY63vdTTTzvVpmuQesLhYh_FSz9e-Asy0kxzuvyjXanmo9MOzhMJZCMIKlQlpvIz1KjI-fCjGgRDsZ1EMoUO54f6MYYw_wtooWu_kUEUvoS-h4PRfkkCTBYhjv1gOpvjZD7zw-k2xpFPpu4iwnjEyZZytQQDIFjNFy7LwkRwuB6xJXYxdj3Pcxehj6cTsnXjxTwK48DdYrxwzQ46JYxPjB7ma98oX1qVtsVOwUvOlFbtSzg4wbaKUisO-JNC72W-zPaMsywaWdFLq_o_31-KKQ">