<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - [X86][MMX] hoisting behavior of mmx intrinsics and _mm_empty"
   href="https://bugs.llvm.org/show_bug.cgi?id=50375">50375</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>[X86][MMX] hoisting behavior of mmx intrinsics and _mm_empty
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Windows NT
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>greg.bedwell@sony.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, pengfei.wang@intel.com, spatel+llvm@rotateright.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>I'll preface this by saying that I've only observed this in code generated by
one of our random fuzzers, so as far as I am aware there is no real-world
use-case for this.  Nevertheless I thought it was worthy of at least a question
here.

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ cat test.cpp
// ----------------------------------------------------------------------------
// From  lib/clang/13.0.0/include/mmintrin.h

typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
typedef long long __v1di __attribute__((__vector_size__(8)));

static __inline__ __m64
    __attribute__((__always_inline__, __nodebug__, __target__("mmx"),
                   __min_vector_width__(64)))
    _mm_srli_si64(__m64 __m, int __count) {
  return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
}

static __inline__ void
    __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
    _mm_empty(void) {
  __builtin_ia32_emms();
}
// ----------------------------------------------------------------------------

using ll = long long;
using uchar = unsigned char;

void test86() {
  volatile ll id18225 = 1775742612LL >> 52;
  for (uchar id18207_idx = 0; (id18207_idx < 21) && (812102100LL == id18225);
       ++id18207_idx) {
    for (uchar id18210_idx = 0; (id18210_idx < 100); ++id18210_idx) {
      __m64 id18213 = {1};
      volatile __m64 id18212 = _mm_srli_si64(id18213, 63);
      _mm_empty();
    }
  }
}

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In theory this testcase has been written to be safe w.r.t. the mmx state. 
Every time _mm_srli_si64 is executed, we should also, in theory, execute
_mm_empty.

If you look at the clang O2 codegen though, we get this:


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ bin\clang.exe --version
clang version 13.0.0 (<a href="https://github.com/llvm/llvm-project.git">https://github.com/llvm/llvm-project.git</a>
6052a8a53559d667321637f7159353ab724a1141)
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: g:\llvm-project\build\bin

$ bin\clang.exe -target x86_64-unknown-unknown -S -O2 test.cpp -o -
        .text
        .file   "test.cpp"
        .globl  _Z6test86v                      # -- Begin function _Z6test86v
        .p2align        4, 0x90
        .type   _Z6test86v,@function
_Z6test86v:                             # @_Z6test86v
        .cfi_startproc
# %bb.0:
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        movq    $0, -16(%rbp)
        xorl    %eax, %eax
        movl    $1, %ecx
        movd    %ecx, %mm0
        psrlq   $63, %mm0
        movq    %mm0, %rcx
        .p2align        4, 0x90
.LBB0_1:                                # =>This Loop Header: Depth=1
                                        #     Child Loop BB0_3 Depth 2
        movq    -16(%rbp), %rdx
        cmpq    $812102100, %rdx                # imm = 0x3067B1D4
        jne     .LBB0_5
# %bb.2:                                #   in Loop: Header=BB0_1 Depth=1
        movb    $100, %dl
        .p2align        4, 0x90
.LBB0_3:                                #   Parent Loop BB0_1 Depth=1
                                        # =>  This Inner Loop Header: Depth=2
        movq    %rcx, -8(%rbp)
        emms
        movq    %rcx, -8(%rbp)
        emms
        movq    %rcx, -8(%rbp)
        emms
        movq    %rcx, -8(%rbp)
        emms
        movq    %rcx, -8(%rbp)
        emms
        addb    $-5, %dl
        jne     .LBB0_3
# %bb.4:                                #   in Loop: Header=BB0_1 Depth=1
        addb    $1, %al
        cmpb    $21, %al
        jne     .LBB0_1
.LBB0_5:
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end0:
        .size   _Z6test86v, .Lfunc_end0-_Z6test86v
        .cfi_endproc
                                        # -- End function
        .ident  "clang version 13.0.0 (<a href="https://github.com/llvm/llvm-project.git">https://github.com/llvm/llvm-project.git</a>
6052a8a53559d667321637f7159353ab724a1141)"
        .section        ".note.GNU-stack","",@progbits
        .addrsig

$

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The compiler has realised that that the _mm_srli_si64 is invariant and has
hoisted the psrlq out of the loop so it is now executed unconditionally.  The
emms instructions have been left inside the inner loop though, and in this
particular case due to the outer loop condition are never executed. My
assumption is that it's not been hoisted due to the hassideeffects flag. My
question is, should every instruction that might change the mmx technology
state also be marked as hassideeffects, or is that an overkill?  Is there a
better approach?  (assuming that anyone actually cares enough about MMX to
implement one :) ).</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>