[llvm-bugs] [Bug 37387] New: MSVC rotate intrinsics don't (just) generate rotates on x86-64

Tue May 8 21:09:07 PDT 2018

https://bugs.llvm.org/show_bug.cgi?id=37387

            Bug ID: 37387
           Summary: MSVC rotate intrinsics don't (just) generate rotates
                    on x86-64
           Product: clang
           Version: 6.0
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: LLVM Codegen
          Assignee: unassignedclangbugs at nondot.org
          Reporter: fabiang at radgametools.com
                CC: llvm-bugs at lists.llvm.org

This simple test:

// ---- begin
#include <intrin.h>

extern "C" unsigned long long f(unsigned long long a, int b)
{
    return _rotl64(a, b);
}

extern "C" unsigned long long g(unsigned long long a, int b)
{
    return (a << (b & 63)) | (a >> (-b & 63));
}
// ---- end

produces (on x86-64 using clang 6.0 release; only quoting the relevant bits):

# ---- begin
f:                                      # @f
# %bb.0:
        movq    %rcx, %r8
        andl    $63, %edx
        movq    %r8, %rax
        movl    %edx, %ecx
        rolq    %cl, %rax
        testl   %edx, %edx
        cmoveq  %r8, %rax
        retq

g:                                      # @g
# %bb.0:
        movq    %rcx, %rax
        movl    %edx, %ecx
        rolq    %cl, %rax
        retq
# ---- end

The corresponding IR is:

; ---- begin
; Function Attrs: norecurse nounwind readnone sspstrong uwtable
define i64 @f(i64, i32) local_unnamed_addr #0 {
  %3 = and i32 %1, 63
  %4 = zext i32 %3 to i64
  %5 = sub nsw i64 64, %4
  %6 = shl i64 %0, %4
  %7 = lshr i64 %0, %5
  %8 = or i64 %7, %6
  %9 = icmp eq i32 %3, 0
  %10 = select i1 %9, i64 %0, i64 %8
  ret i64 %10
}

; Function Attrs: norecurse nounwind readnone sspstrong uwtable
define i64 @g(i64, i32) local_unnamed_addr #0 {
  %3 = and i32 %1, 63
  %4 = zext i32 %3 to i64
  %5 = shl i64 %0, %4
  %6 = sub nsw i32 0, %1
  %7 = and i32 %6, 63
  %8 = zext i32 %7 to i64
  %9 = lshr i64 %0, %8
  %10 = or i64 %5, %9
  ret i64 %10
}

; ---- end

The problem is the expansion chosen for the rotr/rotl intrinsics in
CGBuiltin.cpp CodeGenFunction::EmitBuiltinExpr, presumably to avoid
implementation-specific behavior from the right shift by 64-b.

Note that the alternative expansion for rotate-left given in the code for g
avoids the problematic select, is well-defined, and already gets matched to ROL
(in the x86-64 backend anyway), so it seems like a good alternative.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180509/b9f4081f/attachment-0001.html>