[llvm-bugs] [Bug 37387] New: MSVC rotate intrinsics don't (just) generate rotates on x86-64
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue May 8 21:09:07 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=37387
Bug ID: 37387
Summary: MSVC rotate intrinsics don't (just) generate rotates
on x86-64
Product: clang
Version: 6.0
Hardware: PC
OS: Windows NT
Status: NEW
Severity: normal
Priority: P
Component: LLVM Codegen
Assignee: unassignedclangbugs at nondot.org
Reporter: fabiang at radgametools.com
CC: llvm-bugs at lists.llvm.org
This simple test:
// ---- begin
#include <intrin.h>
extern "C" unsigned long long f(unsigned long long a, int b)
{
return _rotl64(a, b);
}
extern "C" unsigned long long g(unsigned long long a, int b)
{
return (a << (b & 63)) | (a >> (-b & 63));
}
// ---- end
produces (on x86-64 using clang 6.0 release; only quoting the relevant bits):
# ---- begin
f: # @f
# %bb.0:
movq %rcx, %r8
andl $63, %edx
movq %r8, %rax
movl %edx, %ecx
rolq %cl, %rax
testl %edx, %edx
cmoveq %r8, %rax
retq
g: # @g
# %bb.0:
movq %rcx, %rax
movl %edx, %ecx
rolq %cl, %rax
retq
# ---- end
The corresponding IR is:
; ---- begin
; Function Attrs: norecurse nounwind readnone sspstrong uwtable
define i64 @f(i64, i32) local_unnamed_addr #0 {
%3 = and i32 %1, 63
%4 = zext i32 %3 to i64
%5 = sub nsw i64 64, %4
%6 = shl i64 %0, %4
%7 = lshr i64 %0, %5
%8 = or i64 %7, %6
%9 = icmp eq i32 %3, 0
%10 = select i1 %9, i64 %0, i64 %8
ret i64 %10
}
; Function Attrs: norecurse nounwind readnone sspstrong uwtable
define i64 @g(i64, i32) local_unnamed_addr #0 {
%3 = and i32 %1, 63
%4 = zext i32 %3 to i64
%5 = shl i64 %0, %4
%6 = sub nsw i32 0, %1
%7 = and i32 %6, 63
%8 = zext i32 %7 to i64
%9 = lshr i64 %0, %8
%10 = or i64 %5, %9
ret i64 %10
}
; ---- end
The problem is the expansion chosen for the rotr/rotl intrinsics in
CGBuiltin.cpp CodeGenFunction::EmitBuiltinExpr, presumably to avoid
implementation-specific behavior from the right shift by 64-b.
Note that the alternative expansion for rotate-left given in the code for g
avoids the problematic select, is well-defined, and already gets matched to ROL
(in the x86-64 backend anyway), so it seems like a good alternative.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180509/b9f4081f/attachment-0001.html>
More information about the llvm-bugs
mailing list