[llvm-bugs] [Bug 38217] New: Clang/LLVM optimizes division and modulo worse than MSVC, part 2
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed Jul 18 19:58:10 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=38217
Bug ID: 38217
Summary: Clang/LLVM optimizes division and modulo worse than
MSVC, part 2
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: normal
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: sfinae at hotmail.com
CC: llvm-bugs at lists.llvm.org
Created attachment 20570
--> https://bugs.llvm.org/attachment.cgi?id=20570&action=edit
Test case
This appears to be a different bug than
https://bugs.llvm.org/show_bug.cgi?id=37983 "Clang/LLVM optimizes division and
modulo worse than MSVC" (which is probably a duplicate of
https://bugs.llvm.org/show_bug.cgi?id=23106 "Division followed by modulo
generates longer machine code than vice versa") because it involves modulo
followed by division.
This affects the Ryu algorithm for printing floating-point numbers
(https://github.com/ulfjack/ryu ) and therefore affects C++17 floating-point
std::to_chars().
I observe that MSVC's codegen is unaffected by WORKAROUND, while Clang/LLVM
generates less assembly code (which is faster when profiled in the real
algorithm) for WORKAROUND.
Here's a Godbolt link demonstrating the codegen difference (this isn't
Windows-specific): https://godbolt.org/g/uX1AD8
C:\Temp\TESTING_X64>cl
Microsoft (R) C/C++ Optimizing Compiler Version 19.16.26504 for x64
Copyright (C) Microsoft Corporation. All rights reserved.
usage: cl [ option... ] filename... [ /link linkoption... ]
C:\Temp\TESTING_X64>clang-cl -m64 -v
clang version 6.0.0 (tags/RELEASE_600/final)
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: S:\msvc\src\vctools\NonShip\ClangLLVM\bin
C:\Temp\TESTING_X64>type d2s.cpp
#include <stdint.h>
#include <string.h>
static const char DIGIT_TABLE[] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
"4041424344454647484950515253545556575859"
"6061626364656667686970717273747576777879"
"8081828384858687888990919293949596979899";
void d2s_buffered(uint64_t output, char * result) {
uint32_t i = 0;
while (output >= 10000) {
#ifdef WORKAROUND
const uint32_t c = (uint32_t) (output - 10000 * (output / 10000));
#else
const uint32_t c = (uint32_t) (output % 10000);
#endif
output /= 10000;
const uint32_t c0 = (c % 100) << 1;
const uint32_t c1 = (c / 100) << 1;
memcpy(result - i - 1, DIGIT_TABLE + c0, 2);
memcpy(result - i - 3, DIGIT_TABLE + c1, 2);
i += 4;
}
}
C:\Temp\TESTING_X64>cl /EHsc /nologo /W4 /MT /O2 /c d2s.cpp /FAsc
/Famsvc_workaround.cod /DWORKAROUND
d2s.cpp
C:\Temp\TESTING_X64>cl /EHsc /nologo /W4 /MT /O2 /c d2s.cpp /FAsc
/Famsvc_modulo.cod
d2s.cpp
C:\Temp\TESTING_X64>git diff msvc_workaround.cod msvc_modulo.cod
diff --git a/msvc_workaround.cod b/msvc_modulo.cod
index 1be1419..aff234c 100644
--- a/msvc_workaround.cod
+++ b/msvc_modulo.cod
@@ -86,11 +86,11 @@ $LL2 at d2s_buffer:
; 15 : #ifdef WORKAROUND
; 16 : const uint32_t c = (uint32_t) (output - 10000 * (output /
10000));
+; 17 : #else^M
+; 18 : const uint32_t c = (uint32_t) (output % 10000);^M
00030 48 8b c7 mov rax, rdi
-; 17 : #else
-; 18 : const uint32_t c = (uint32_t) (output % 10000);
; 19 : #endif
; 20 :
; 21 : output /= 10000;
C:\Temp\TESTING_X64>clang-cl -m64 /EHsc /nologo /W4 /MT /O2 /c d2s.cpp /FA
/Faclang_workaround.asm /DWORKAROUND
C:\Temp\TESTING_X64>clang-cl -m64 /EHsc /nologo /W4 /MT /O2 /c d2s.cpp /FA
/Faclang_modulo.asm
C:\Temp\TESTING_X64>git diff clang_workaround.asm clang_modulo.asm
diff --git a/clang_workaround.asm b/clang_modulo.asm
index 2a8cb49..6026638 100644
--- a/clang_workaround.asm
+++ b/clang_modulo.asm
@@ -29,19 +29,22 @@
movq %r9, %rax
mulq %r10
shrq $11, %rdx
- imulq $-10000, %rdx, %rax # imm = 0xD8F0
- addq %r9, %rax
- movl %eax, %esi
- imulq $1374389535, %rsi, %rsi # imm = 0x51EB851F
- shrq $37, %rsi
- imull $100, %esi, %edi
- subl %edi, %eax
+ imulq $10000, %rdx, %rax # imm = 0x2710
+ movq %r9, %rsi
+ subq %rax, %rsi
+ imulq $1374389535, %rsi, %rax # imm = 0x51EB851F
+ movq %rax, %rdi
+ shrq $37, %rdi
+ imull $100, %edi, %edi
+ subl %edi, %esi
+ shrq $36, %rax
+ andl $510, %eax # imm = 0x1FE
movl %ecx, %edi
movq %r8, %rbx
subq %rdi, %rbx
- movzwl (%r11,%rax,2), %eax
- movw %ax, -1(%rbx)
- movzwl (%r11,%rsi,2), %eax
+ movzwl (%r11,%rsi,2), %esi
+ movw %si, -1(%rbx)
+ movzwl (%rax,%r11), %eax
movw %ax, -3(%rbx)
addl $4, %ecx
cmpq $99999999, %r9 # imm = 0x5F5E0FF
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180719/543bccb3/attachment.html>
More information about the llvm-bugs
mailing list