[llvm-bugs] [Bug 34292] New: Unusual code generation for addcarryx (ADCX and ADOX)

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Aug 22 18:02:32 PDT 2017


            Bug ID: 34292
           Summary: Unusual code generation for addcarryx (ADCX and ADOX)
           Product: clang
           Version: 3.9
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: LLVM Codegen
          Assignee: unassignedclangbugs at nondot.org
          Reporter: noloader at gmail.com
                CC: llvm-bugs at lists.llvm.org

Created attachment 19032
  --> https://bugs.llvm.org/attachment.cgi?id=19032&action=edit
Test file

LLVM is producing unusual code for ADCX and ADOX.

The source file below (zboson.cxx) is modified from

All the tests were run on the same machine. The machine is a 6th gen Core i5
(Skylake). It includes ADX cpu features.


$ cat zboson.cxx
#include <x86intrin.h>
#include <stdint.h>

#define LEN 4  // N = N*64-bit add e.g. 4=256-bit add, 3=192-bit add, ...

static unsigned char c = 0;

template<int START, int N>
struct Repeat {
    static void add (uint64_t *x, uint64_t *y) {
#if defined(__INTEL_COMPILER)
        const uint64_t* a = x;
        uint64_t* b = y;
        const long long unsigned int* a = reinterpret_cast<const long long
unsigned int*>(x);
        long long unsigned int* b = reinterpret_cast<long long unsigned
        c = _addcarryx_u64(c, a[START], b[START], &b[START]);
        Repeat<START+1, N>::add(x,y);

template<int N>
    struct Repeat<LEN, N> {
    static void add (uint64_t *x, uint64_t *y) {}

void sum_unroll(uint64_t *x, uint64_t *y) {


$ clang++ -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o

zboson.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <_Z10sum_unrollPmS_>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   8a 05 00 00 00 00       mov    0x0(%rip),%al        # a
   a:   48 8b 0f                mov    (%rdi),%rcx
   d:   04 ff                   add    $0xff,%al
   f:   66 48 0f 38 f6 0e       adcx   (%rsi),%rcx
  15:   48 89 0e                mov    %rcx,(%rsi)
  18:   50                      push   %rax
  19:   48 89 f8                mov    %rdi,%rax
  1c:   04 7f                   add    $0x7f,%al
  1e:   9e                      sahf
  1f:   58                      pop    %rax
  20:   0f 92 c0                setb   %al
  23:   48 8b 4f 08             mov    0x8(%rdi),%rcx
  27:   04 ff                   add    $0xff,%al
  29:   66 48 0f 38 f6 4e 08    adcx   0x8(%rsi),%rcx
  30:   48 89 4e 08             mov    %rcx,0x8(%rsi)
  34:   50                      push   %rax
  35:   48 89 f8                mov    %rdi,%rax
  38:   04 7f                   add    $0x7f,%al
  3a:   9e                      sahf
  3b:   58                      pop    %rax
  3c:   0f 92 c0                setb   %al
  3f:   48 8b 4f 10             mov    0x10(%rdi),%rcx
  43:   04 ff                   add    $0xff,%al
  45:   66 48 0f 38 f6 4e 10    adcx   0x10(%rsi),%rcx
  4c:   48 89 4e 10             mov    %rcx,0x10(%rsi)
  50:   50                      push   %rax
  51:   48 89 f8                mov    %rdi,%rax
  54:   04 7f                   add    $0x7f,%al
  56:   9e                      sahf
  57:   58                      pop    %rax
  58:   0f 92 c0                setb   %al
  5b:   48 8b 4f 18             mov    0x18(%rdi),%rcx
  5f:   04 ff                   add    $0xff,%al
  61:   66 48 0f 38 f6 4e 18    adcx   0x18(%rsi),%rcx
  68:   48 89 4e 18             mov    %rcx,0x18(%rsi)
  6c:   50                      push   %rax
  6d:   48 89 f8                mov    %rdi,%rax
  70:   04 7f                   add    $0x7f,%al
  72:   9e                      sahf
  73:   58                      pop    %rax
  74:   0f 92 c0                setb   %al
  77:   88 05 00 00 00 00       mov    %al,0x0(%rip)        # 7d
  7d:   5d                      pop    %rbp
  7e:   c3                      retq


$ icc -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o

zboson.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <_Z10sum_unrollPmS_>:
   0:   45 33 c9                xor    %r9d,%r9d
   3:   0f b6 05 00 00 00 00    movzbl 0x0(%rip),%eax        # a
   a:   44 3b c8                cmp    %eax,%r9d
   d:   48 8b 17                mov    (%rdi),%rdx
  10:   66 48 0f 38 f6 16       adcx   (%rsi),%rdx
  16:   48 89 16                mov    %rdx,(%rsi)
  19:   48 8b 4f 08             mov    0x8(%rdi),%rcx
  1d:   66 48 0f 38 f6 4e 08    adcx   0x8(%rsi),%rcx
  24:   48 89 4e 08             mov    %rcx,0x8(%rsi)
  28:   4c 8b 47 10             mov    0x10(%rdi),%r8
  2c:   66 4c 0f 38 f6 46 10    adcx   0x10(%rsi),%r8
  33:   4c 89 46 10             mov    %r8,0x10(%rsi)
  37:   4c 8b 57 18             mov    0x18(%rdi),%r10
  3b:   66 4c 0f 38 f6 56 18    adcx   0x18(%rsi),%r10
  42:   4c 89 56 18             mov    %r10,0x18(%rsi)
  46:   41 0f 92 c1             setb   %r9b
  4a:   44 88 0d 00 00 00 00    mov    %r9b,0x0(%rip)        # 51
  51:   c3                      retq
  52:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)
  59:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)


$ g++ -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o

zboson.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <_Z10sum_unrollPmS_>:
   0:   0f b6 05 00 00 00 00    movzbl 0x0(%rip),%eax        # 7
   7:   04 ff                   add    $0xff,%al
   9:   48 8b 07                mov    (%rdi),%rax
   c:   48 13 06                adc    (%rsi),%rax
   f:   0f 92 c2                setb   %dl
  12:   48 89 06                mov    %rax,(%rsi)
  15:   48 8b 47 08             mov    0x8(%rdi),%rax
  19:   80 c2 ff                add    $0xff,%dl
  1c:   48 13 46 08             adc    0x8(%rsi),%rax
  20:   0f 92 c2                setb   %dl
  23:   48 89 46 08             mov    %rax,0x8(%rsi)
  27:   48 8b 47 10             mov    0x10(%rdi),%rax
  2b:   80 c2 ff                add    $0xff,%dl
  2e:   48 13 46 10             adc    0x10(%rsi),%rax
  32:   0f 92 c2                setb   %dl
  35:   48 89 46 10             mov    %rax,0x10(%rsi)
  39:   48 8b 47 18             mov    0x18(%rdi),%rax
  3d:   80 c2 ff                add    $0xff,%dl
  40:   48 13 46 18             adc    0x18(%rsi),%rax
  44:   48 89 46 18             mov    %rax,0x18(%rsi)
  48:   0f 92 05 00 00 00 00    setb   0x0(%rip)        # 4f
  4f:   c3                      retq


Switching from addcaryx_u64 to addcarry_u64 produces the same unusual code:

    c = _addcarry_u64(c, a[START], b[START], &b[START]);
    Repeat<START+1, N>::add(x,y);

$ clang++ -g2 -O3 -march=native zboson.cxx -c
$ objdump --disassemble zboson.o

zboson.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <_Z10sum_unrollPmS_>:
   0:   55                      push   %rbp
   1:   48 89 e5                mov    %rsp,%rbp
   4:   8a 05 00 00 00 00       mov    0x0(%rip),%al        # a
   a:   48 8b 0f                mov    (%rdi),%rcx
   d:   04 ff                   add    $0xff,%al
   f:   66 48 0f 38 f6 0e       adcx   (%rsi),%rcx
  15:   48 89 0e                mov    %rcx,(%rsi)
  18:   50                      push   %rax
  19:   48 89 f8                mov    %rdi,%rax
  1c:   04 7f                   add    $0x7f,%al
  1e:   9e                      sahf
  1f:   58                      pop    %rax
  20:   0f 92 c0                setb   %al
  23:   48 8b 4f 08             mov    0x8(%rdi),%rcx
  27:   04 ff                   add    $0xff,%al
  29:   66 48 0f 38 f6 4e 08    adcx   0x8(%rsi),%rcx
  30:   48 89 4e 08             mov    %rcx,0x8(%rsi)
  34:   50                      push   %rax
  35:   48 89 f8                mov    %rdi,%rax
  38:   04 7f                   add    $0x7f,%al
  3a:   9e                      sahf
  3b:   58                      pop    %rax
  3c:   0f 92 c0                setb   %al
  3f:   48 8b 4f 10             mov    0x10(%rdi),%rcx
  43:   04 ff                   add    $0xff,%al
  45:   66 48 0f 38 f6 4e 10    adcx   0x10(%rsi),%rcx
  4c:   48 89 4e 10             mov    %rcx,0x10(%rsi)
  50:   50                      push   %rax
  51:   48 89 f8                mov    %rdi,%rax
  54:   04 7f                   add    $0x7f,%al
  56:   9e                      sahf
  57:   58                      pop    %rax
  58:   0f 92 c0                setb   %al
  5b:   48 8b 4f 18             mov    0x18(%rdi),%rcx
  5f:   04 ff                   add    $0xff,%al
  61:   66 48 0f 38 f6 4e 18    adcx   0x18(%rsi),%rcx
  68:   48 89 4e 18             mov    %rcx,0x18(%rsi)
  6c:   50                      push   %rax
  6d:   48 89 f8                mov    %rdi,%rax
  70:   04 7f                   add    $0x7f,%al
  72:   9e                      sahf
  73:   58                      pop    %rax
  74:   0f 92 c0                setb   %al
  77:   88 05 00 00 00 00       mov    %al,0x0(%rip)        # 7d
  7d:   5d                      pop    %rbp
  7e:   c3                      retq


$ clang++ --version
clang version 4.0.0 (tags/RELEASE_400/final)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /bin

$ g++ --version
g++ (GCC) 7.1.1 20170622 (Red Hat 7.1.1-3)
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO

$ icc --version
icc (ICC) 17.0.4 20170411
Copyright (C) 1985-2017 Intel Corporation.  All rights reserved.

You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170823/7c85239d/attachment-0001.html>

More information about the llvm-bugs mailing list