<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/121311>121311</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Fma not optimized for wasm relaxed-simd
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          kzhsw
      </td>
    </tr>
</table>

<pre>
    Env: Compiler explorer (<https://godbolt.org/z/Yxc6e8sd4>)
Version: WebAssembly clang (trunk)

Code:
<details><summary>Code</summary>
<p>

```C
#include <wasm_simd128.h>

void fma_inst(float * a, float * b, float * c, float * dest)  {
    v128_t va, vb, vc;
    va = wasm_v128_load(a);
    vb = wasm_v128_load(b);
    vc = wasm_v128_load(c);
    va = wasm_f32x4_mul(va, vb);
    va = wasm_f32x4_add(va, vc);
    wasm_v128_store(dest, va);
}

void fma_operator(float * a, float * b, float * c, float * dest) {
    __f32x4 va, vb, vc;
    va = wasm_v128_load(a);
    vb = wasm_v128_load(b);
    vc = wasm_v128_load(c);
    va = (va * vb) + vc;
 wasm_v128_store(dest, va);
}

void fma_buildin(float * a, float * b, float * c, float * dest)  {
    __f32x4 va, vb, vc;
    va = wasm_v128_load(a);
    vb = wasm_v128_load(b);
    vc = wasm_v128_load(c);
    va = __builtin_elementwise_fma(va, vb, vc);
 wasm_v128_store(dest, va);
}

void fma_expected(float * a, float * b, float * c, float * dest)  {
    __f32x4 va, vb, vc;
    va = wasm_v128_load(a);
    vb = wasm_v128_load(b);
    vc = wasm_v128_load(c);
    va = __builtin_wasm_relaxed_madd_f32x4(va, vb, vc);
 wasm_v128_store(dest, va);
}
```

</p>
</details> 

Flags: `-O3 -msimd128 -mrelaxed-simd -ffast-math`

Expected:
All impls optimized like using intrinsic.
```wat
fma_expected:
        local.get       3
 local.get       0
        v128.load       0:p2align=0
        local.get 1
        v128.load       0:p2align=0
        local.get       2
 v128.load       0:p2align=0
        f32x4.relaxed_madd
        v128.store 0:p2align=0
        end_function
```

Actual:
```wat
fma_inst:
 local.get       3
        local.get       1
        v128.load 0:p2align=0
        local.get       0
        v128.load       0:p2align=0
 f32x4.mul
        local.get       2
        v128.load 0:p2align=0
        f32x4.add
        v128.store      0:p2align=0
 end_function

fma_operator:
        local.get       3
        local.get 1
        v128.load       0:p2align=0
        local.get       0
 v128.load       0:p2align=0
        f32x4.mul
        local.get       2
 v128.load       0:p2align=0
        f32x4.add
        v128.store 0:p2align=0
        end_function

fma_buildin:
        local.get 3
        local.get       0
        v128.load       0:p2align=0
 local.tee       4
        f32x4.extract_lane      0
        local.get 1
        v128.load       0:p2align=0
        local.tee       5
 f32x4.extract_lane      0
        local.get       2
        v128.load 0:p2align=0
        local.tee       6
        f32x4.extract_lane      0
 call    fmaf
        f32x4.splat
        local.get       4
 f32x4.extract_lane      1
        local.get       5
 f32x4.extract_lane      1
        local.get       6
 f32x4.extract_lane      1
        call    fmaf
        f32x4.replace_lane 1
        local.get       4
        f32x4.extract_lane      2
 local.get       5
        f32x4.extract_lane      2
        local.get 6
        f32x4.extract_lane      2
        call    fmaf
 f32x4.replace_lane      2
        local.get       4
 f32x4.extract_lane      3
        local.get       5
 f32x4.extract_lane      3
        local.get       6
 f32x4.extract_lane      3
        call    fmaf
        f32x4.replace_lane 3
        v128.store      0:p2align=0
        end_function
```

Other info:
The same optimizations apply to x86: <https://godbolt.org/z/jYKMEq4rM>
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzkWE9v6jgQ_zTmYoESO_w75JBSclk9vctqV-8UOfEE_GrH2dihtJ9-FSclUGia9nWlJ22EBLbnN39-M8wYmDFiVwCEaH6H5vcTVtu9rsKH5715nKSaP4Xb4oBohDdalUJCheFYSl1BhRFZIbrZW1saRCNEYkTineaplnamqx0i8TMi8Y9jtoCV4QGiW0TWyIv-gsoIXTRK_4Y0MgZUKp9wJlmxa5Taqi4eWlHkRRvNoVHvRYhuOFgmpGlU0Y2plWLVE6LbVmaDSNzvtYCy--RFaOG1r02zIFQUmaw5YEQ3j8yoxAjFfbKa7U-IgxYc54olojAWkVUuNbMYkQgzRDa4X6aXy-xyyaFBrzFGyzvkRRhjfPDJKrH44PQcHPyQIXo6ZhjRe-zccqJSM47IijWk9FLpban0Uiq7LZVdSp1ZzCk5BomqJSKr3sNBYcZ5L3ypuTdsrK4AkVXLx8ZF3wmi5f0rynUJFbO6-nXae9aT1tvfj3bHnXPaMY0Ruesd-ySBaS0kF8WXlu1vS2DiwrWiSECCgsI-CgNJrthFCV_W5id5hWMJmQX-fyPWyVcg2RF4ohjnrctfRPBLb-5atWvl5amJIxL3jR-3MrFku2bsYLTwpt8pnqqugeOp6tycNjt4mufM2Klidn8ysH1JopsrkZRYqFIarEsrlHgGjqV4AFwbUeywKGwlCiOy2bmnj8wiL7qoCKcMd4_UGZOzHdhuTZuz15veGaBhatbk4nRGo5IwKXYFovfeTdX-L-LbhzRn4-Eu77PzUnjthcv3gAYoeJLXRWabW8BV9qPM1kx2I_-abjeMW6pvcvxGkLepGk_SB1PVktQM0Xd4_4BLrc43-X7LlddstzSeBuz7VfsflZ33mbIbwehHVX5BAbeUvozcNxgdqs4PVlcLt9AlHQdXQcHRViyziWQFXJv4mlT29ud9yY81fJawK_PjDS_GBp4xKZ2QYvkVxpTStZe3vAyGwvMHgIO8DAEXY4GDcVVQSpZBCx0yN6Z-yK2OOx8LvDI8JnNkINAbEQ5YG5HHoa_nYB6HgIN5pJ_KI_1Y9--e4ZH73e6hwqLIddu8_twDNkzBy4WINTCDWVnKJ2w1Pq4W7ur1_q__nz_--Lb9J6i-Ibqd8JDyNV2zCYT-ks7nJFjOyWQf0nSxZvN1TtZrL18vPbIOApivVowD9daLfCJC4pHAJ9TziE8omQUeX6Z0wUia5ksW-CjwQDEhZ1IeVGN-IoypIfSJT31_IlkK0ri_OAgp4BG7U0QImt9PqrABTdN6Z1DgSWGs6dVYYSWEsWK40Pbsfpjryl1w8fldc1JXMnzFiLD7Op1lWiESN1q7t2lZ6Z-QWURi54tBJO6cPYTk3wAAAP__CW7cBA">