<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Unusual code generation for addcarryx (ADCX and ADOX)"
href="https://bugs.llvm.org/show_bug.cgi?id=34292">34292</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Unusual code generation for addcarryx (ADCX and ADOX)
</td>
</tr>
<tr>
<th>Product</th>
<td>clang
</td>
</tr>
<tr>
<th>Version</th>
<td>3.9
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Windows NT
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>LLVM Codegen
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedclangbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>noloader@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>Created <span class=""><a href="attachment.cgi?id=19032" name="attach_19032" title="Test file">attachment 19032</a> <a href="attachment.cgi?id=19032&action=edit" title="Test file">[details]</a></span>
Test file
LLVM is producing unusual code for ADCX and ADOX.
The source file below (zboson.cxx) is modified from
<a href="http://stackoverflow.com/q/33690791/608639">http://stackoverflow.com/q/33690791/608639</a>.
All the tests were run on the same machine. The machine is a 6th gen Core i5
(Skylake). It includes ADX cpu features.
********************
$ cat zboson.cxx
#include <x86intrin.h>
#include <stdint.h>
#define LEN 4 // N = N*64-bit add e.g. 4=256-bit add, 3=192-bit add, ...
static unsigned char c = 0;
template<int START, int N>
struct Repeat {
static void add (uint64_t *x, uint64_t *y) {
#if defined(__INTEL_COMPILER)
const uint64_t* a = x;
uint64_t* b = y;
#else
const long long unsigned int* a = reinterpret_cast<const long long
unsigned int*>(x);
long long unsigned int* b = reinterpret_cast<long long unsigned
int*>(y);
#endif
c = _addcarryx_u64(c, a[START], b[START], &b[START]);
Repeat<START+1, N>::add(x,y);
}
};
template<int N>
struct Repeat<LEN, N> {
static void add (uint64_t *x, uint64_t *y) {}
};
void sum_unroll(uint64_t *x, uint64_t *y) {
Repeat<0,LEN>::add(x,y);
}
********************
$ clang++ -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o
zboson.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z10sum_unrollPmS_>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 8a 05 00 00 00 00 mov 0x0(%rip),%al # a
<_Z10sum_unrollPmS_+0xa>
a: 48 8b 0f mov (%rdi),%rcx
d: 04 ff add $0xff,%al
f: 66 48 0f 38 f6 0e adcx (%rsi),%rcx
15: 48 89 0e mov %rcx,(%rsi)
18: 50 push %rax
19: 48 89 f8 mov %rdi,%rax
1c: 04 7f add $0x7f,%al
1e: 9e sahf
1f: 58 pop %rax
20: 0f 92 c0 setb %al
23: 48 8b 4f 08 mov 0x8(%rdi),%rcx
27: 04 ff add $0xff,%al
29: 66 48 0f 38 f6 4e 08 adcx 0x8(%rsi),%rcx
30: 48 89 4e 08 mov %rcx,0x8(%rsi)
34: 50 push %rax
35: 48 89 f8 mov %rdi,%rax
38: 04 7f add $0x7f,%al
3a: 9e sahf
3b: 58 pop %rax
3c: 0f 92 c0 setb %al
3f: 48 8b 4f 10 mov 0x10(%rdi),%rcx
43: 04 ff add $0xff,%al
45: 66 48 0f 38 f6 4e 10 adcx 0x10(%rsi),%rcx
4c: 48 89 4e 10 mov %rcx,0x10(%rsi)
50: 50 push %rax
51: 48 89 f8 mov %rdi,%rax
54: 04 7f add $0x7f,%al
56: 9e sahf
57: 58 pop %rax
58: 0f 92 c0 setb %al
5b: 48 8b 4f 18 mov 0x18(%rdi),%rcx
5f: 04 ff add $0xff,%al
61: 66 48 0f 38 f6 4e 18 adcx 0x18(%rsi),%rcx
68: 48 89 4e 18 mov %rcx,0x18(%rsi)
6c: 50 push %rax
6d: 48 89 f8 mov %rdi,%rax
70: 04 7f add $0x7f,%al
72: 9e sahf
73: 58 pop %rax
74: 0f 92 c0 setb %al
77: 88 05 00 00 00 00 mov %al,0x0(%rip) # 7d
<_Z10sum_unrollPmS_+0x7d>
7d: 5d pop %rbp
7e: c3 retq
********************
$ icc -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o
zboson.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z10sum_unrollPmS_>:
0: 45 33 c9 xor %r9d,%r9d
3: 0f b6 05 00 00 00 00 movzbl 0x0(%rip),%eax # a
<_Z10sum_unrollPmS_+0xa>
a: 44 3b c8 cmp %eax,%r9d
d: 48 8b 17 mov (%rdi),%rdx
10: 66 48 0f 38 f6 16 adcx (%rsi),%rdx
16: 48 89 16 mov %rdx,(%rsi)
19: 48 8b 4f 08 mov 0x8(%rdi),%rcx
1d: 66 48 0f 38 f6 4e 08 adcx 0x8(%rsi),%rcx
24: 48 89 4e 08 mov %rcx,0x8(%rsi)
28: 4c 8b 47 10 mov 0x10(%rdi),%r8
2c: 66 4c 0f 38 f6 46 10 adcx 0x10(%rsi),%r8
33: 4c 89 46 10 mov %r8,0x10(%rsi)
37: 4c 8b 57 18 mov 0x18(%rdi),%r10
3b: 66 4c 0f 38 f6 56 18 adcx 0x18(%rsi),%r10
42: 4c 89 56 18 mov %r10,0x18(%rsi)
46: 41 0f 92 c1 setb %r9b
4a: 44 88 0d 00 00 00 00 mov %r9b,0x0(%rip) # 51
<_Z10sum_unrollPmS_+0x51>
51: c3 retq
52: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
59: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
********************
$ g++ -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o
zboson.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z10sum_unrollPmS_>:
0: 0f b6 05 00 00 00 00 movzbl 0x0(%rip),%eax # 7
<_Z10sum_unrollPmS_+0x7>
7: 04 ff add $0xff,%al
9: 48 8b 07 mov (%rdi),%rax
c: 48 13 06 adc (%rsi),%rax
f: 0f 92 c2 setb %dl
12: 48 89 06 mov %rax,(%rsi)
15: 48 8b 47 08 mov 0x8(%rdi),%rax
19: 80 c2 ff add $0xff,%dl
1c: 48 13 46 08 adc 0x8(%rsi),%rax
20: 0f 92 c2 setb %dl
23: 48 89 46 08 mov %rax,0x8(%rsi)
27: 48 8b 47 10 mov 0x10(%rdi),%rax
2b: 80 c2 ff add $0xff,%dl
2e: 48 13 46 10 adc 0x10(%rsi),%rax
32: 0f 92 c2 setb %dl
35: 48 89 46 10 mov %rax,0x10(%rsi)
39: 48 8b 47 18 mov 0x18(%rdi),%rax
3d: 80 c2 ff add $0xff,%dl
40: 48 13 46 18 adc 0x18(%rsi),%rax
44: 48 89 46 18 mov %rax,0x18(%rsi)
48: 0f 92 05 00 00 00 00 setb 0x0(%rip) # 4f
<_Z10sum_unrollPmS_+0x4f>
4f: c3 retq
********************
Switching from addcaryx_u64 to addcarry_u64 produces the same unusual code:
c = _addcarry_u64(c, a[START], b[START], &b[START]);
Repeat<START+1, N>::add(x,y);
$ clang++ -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o
zboson.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <_Z10sum_unrollPmS_>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 8a 05 00 00 00 00 mov 0x0(%rip),%al # a
<_Z10sum_unrollPmS_+0xa>
a: 48 8b 0f mov (%rdi),%rcx
d: 04 ff add $0xff,%al
f: 66 48 0f 38 f6 0e adcx (%rsi),%rcx
15: 48 89 0e mov %rcx,(%rsi)
18: 50 push %rax
19: 48 89 f8 mov %rdi,%rax
1c: 04 7f add $0x7f,%al
1e: 9e sahf
1f: 58 pop %rax
20: 0f 92 c0 setb %al
23: 48 8b 4f 08 mov 0x8(%rdi),%rcx
27: 04 ff add $0xff,%al
29: 66 48 0f 38 f6 4e 08 adcx 0x8(%rsi),%rcx
30: 48 89 4e 08 mov %rcx,0x8(%rsi)
34: 50 push %rax
35: 48 89 f8 mov %rdi,%rax
38: 04 7f add $0x7f,%al
3a: 9e sahf
3b: 58 pop %rax
3c: 0f 92 c0 setb %al
3f: 48 8b 4f 10 mov 0x10(%rdi),%rcx
43: 04 ff add $0xff,%al
45: 66 48 0f 38 f6 4e 10 adcx 0x10(%rsi),%rcx
4c: 48 89 4e 10 mov %rcx,0x10(%rsi)
50: 50 push %rax
51: 48 89 f8 mov %rdi,%rax
54: 04 7f add $0x7f,%al
56: 9e sahf
57: 58 pop %rax
58: 0f 92 c0 setb %al
5b: 48 8b 4f 18 mov 0x18(%rdi),%rcx
5f: 04 ff add $0xff,%al
61: 66 48 0f 38 f6 4e 18 adcx 0x18(%rsi),%rcx
68: 48 89 4e 18 mov %rcx,0x18(%rsi)
6c: 50 push %rax
6d: 48 89 f8 mov %rdi,%rax
70: 04 7f add $0x7f,%al
72: 9e sahf
73: 58 pop %rax
74: 0f 92 c0 setb %al
77: 88 05 00 00 00 00 mov %al,0x0(%rip) # 7d
<_Z10sum_unrollPmS_+0x7d>
7d: 5d pop %rbp
7e: c3 retq
********************
$ clang++ --version
clang version 4.0.0 (tags/RELEASE_400/final)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /bin
$ g++ --version
g++ (GCC) 7.1.1 20170622 (Red Hat 7.1.1-3)
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
$ icc --version
icc (ICC) 17.0.4 20170411
Copyright (C) 1985-2017 Intel Corporation. All rights reserved.</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>