[cfe-dev] Static linking a program

Wink Saville via cfe-dev cfe-dev at lists.llvm.org
Wed Jul 4 15:20:13 PDT 2018


Executive summary: In _dl_get_origin lld is linking a call to mempcpy which
I'd call "thunking code" as it returns in rax the address of the code
that should be called instead of the address of the next available
address of the
destination buffer. And specifically, since rax is pointing at code, when a zero
is stored to try to terminate the string we seg fault.

I'd be glad to file a bug if you like.


Below is the steps I went through to show that starting with showing
if you link with gold all is well:

$ PATH=/home/wink/prgs/llvm/dist/bin:$PATH clang++ -fuse-ld=gold -o
main-fuse-ld.gold -v -static -pthread main.cpp
clang version 7.0.0 (git at github.com:llvm-mirror/clang.git
833ad29a88de7d88bd69234b5069bbfaea0235d1)
(git at github.com:llvm-mirror/llvm
c55ef4741ac75872f0e692c87eac70745b3ce167)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/wink/prgs/llvm/dist/bin
Found candidate GCC installation: /usr/lib/gcc/x86_64-pc-linux-gnu/8.1.1
Found candidate GCC installation: /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1
Selected GCC installation: /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Selected multilib: .;@m64
 "/home/wink/prgs/llvm/dist/bin/clang-7" -cc1 -triple
x86_64-unknown-linux-gnu -emit-obj -mrelax-all -disable-free
-disable-llvm-verifier -discard-value-names -main-file-name main.cpp
-static-define -mrelocation-model static -mthread-model posix
-mdisable-fp-elim -fmath-errno -masm-verbose -mconstructor-aliases
-munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info
-debugger-tuning=gdb -v -resource-dir
/home/wink/prgs/llvm/dist/lib/clang/7.0.0 -internal-isystem
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1
-internal-isystem
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/x86_64-pc-linux-gnu
-internal-isystem
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/backward
-internal-isystem /usr/local/include -internal-isystem
/home/wink/prgs/llvm/dist/lib/clang/7.0.0/include
-internal-externc-isystem /include -internal-externc-isystem
/usr/include -fdeprecated-macro -fdebug-compilation-dir
/home/wink/prgs/explore-cpp-static-linking -ferror-limit 19
-fmessage-length 114 -pthread -fobjc-runtime=gcc -fcxx-exceptions
-fexceptions -fdiagnostics-show-option -fcolor-diagnostics -o
/tmp/main-30c2dc.o -x c++ main.cpp
clang -cc1 version 7.0.0 based upon LLVM 7.0.0svn default target
x86_64-unknown-linux-gnu
ignoring nonexistent directory "/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1
 /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/x86_64-pc-linux-gnu
 /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/backward
 /usr/local/include
 /home/wink/prgs/llvm/dist/lib/clang/7.0.0/include
 /usr/include
End of search list.
 "/usr/bin/ld.gold" --eh-frame-hdr -m elf_x86_64 -static -o
main-fuse-ld.gold
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64/crt1.o
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64/crti.o
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/crtbeginT.o
-L/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1
-L/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64
-L/lib/../lib64 -L/usr/lib/../lib64
-L/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../..
-L/home/wink/prgs/llvm/dist/bin/../lib -L/lib -L/usr/lib
/tmp/main-30c2dc.o -lstdc++ -lm --start-group -lgcc -lgcc_eh -lpthread
-lc --end-group /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/crtend.o
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64/crtn.o

$ ./main-fuse-ld.gold 4 5 6
argv[0]: ./main-fuse-ld.gold
argv[1]: 4
argv[2]: 5
argv[3]: 6


Here is using ld.lld we see the failure:

$ PATH=/home/wink/prgs/llvm/dist/bin:$PATH clang++ -fuse-ld=lld -o
main-fuse-ld.lld -v -static -pthread main.cpp
clang version 7.0.0 (git at github.com:llvm-mirror/clang.git
833ad29a88de7d88bd69234b5069bbfaea0235d1)
(git at github.com:llvm-mirror/llvm
c55ef4741ac75872f0e692c87eac70745b3ce167)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /home/wink/prgs/llvm/dist/bin
Found candidate GCC installation: /usr/lib/gcc/x86_64-pc-linux-gnu/8.1.1
Found candidate GCC installation: /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1
Selected GCC installation: /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Selected multilib: .;@m64
 "/home/wink/prgs/llvm/dist/bin/clang-7" -cc1 -triple
x86_64-unknown-linux-gnu -emit-obj -mrelax-all -disable-free
-disable-llvm-verifier -discard-value-names -main-file-name main.cpp
-static-define -mrelocation-model static -mthread-model posix
-mdisable-fp-elim -fmath-errno -masm-verbose -mconstructor-aliases
-munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info
-debugger-tuning=gdb -v -resource-dir
/home/wink/prgs/llvm/dist/lib/clang/7.0.0 -internal-isystem
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1
-internal-isystem
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/x86_64-pc-linux-gnu
-internal-isystem
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/backward
-internal-isystem /usr/local/include -internal-isystem
/home/wink/prgs/llvm/dist/lib/clang/7.0.0/include
-internal-externc-isystem /include -internal-externc-isystem
/usr/include -fdeprecated-macro -fdebug-compilation-dir
/home/wink/prgs/explore-cpp-static-linking -ferror-limit 19
-fmessage-length 114 -pthread -fobjc-runtime=gcc -fcxx-exceptions
-fexceptions -fdiagnostics-show-option -fcolor-diagnostics -o
/tmp/main-91a03f.o -x c++ main.cpp
clang -cc1 version 7.0.0 based upon LLVM 7.0.0svn default target
x86_64-unknown-linux-gnu
ignoring nonexistent directory "/include"
#include "..." search starts here:
#include <...> search starts here:
 /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1
 /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/x86_64-pc-linux-gnu
 /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../include/c++/8.1.1/backward
 /usr/local/include
 /home/wink/prgs/llvm/dist/lib/clang/7.0.0/include
 /usr/include
End of search list.
 "/home/wink/prgs/llvm/dist/bin/ld.lld" --eh-frame-hdr -m elf_x86_64
-static -o main-fuse-ld.lld
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64/crt1.o
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64/crti.o
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/crtbeginT.o
-L/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1
-L/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64
-L/lib/../lib64 -L/usr/lib/../lib64
-L/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../..
-L/home/wink/prgs/llvm/dist/bin/../lib -L/lib -L/usr/lib
/tmp/main-91a03f.o -lstdc++ -lm --start-group -lgcc -lgcc_eh -lpthread
-lc --end-group /usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/crtend.o
/usr/lib64/gcc/x86_64-pc-linux-gnu/8.1.1/../../../../lib64/crtn.o

$ ./main-fuse-ld.lld 7 8 9
Segmentation fault (core dumped)

$ coredumpctl gdb
           PID: 17791 (main-fuse-ld.ll)
           UID: 1000 (wink)
           GID: 100 (users)
        Signal: 11 (SEGV)
     Timestamp: Wed 2018-07-04 15:09:12 PDT (9s ago)
  Command Line: ./main-fuse-ld.lld 7 8 9
    Executable: /home/wink/prgs/explore-cpp-static-linking/main-fuse-ld.lld
 Control Group: /user.slice/user-1000.slice/session-c2.scope
          Unit: session-c2.scope
         Slice: user-1000.slice
       Session: c2
     Owner UID: 1000 (wink)
       Boot ID: a39916a44af64ce3836e760848efaaac
    Machine ID: 8f80fd742eae4659baed812cd07a9439
      Hostname: wink-desktop
       Storage:
/var/lib/systemd/coredump/core.main-fuse-ld\x2ell.1000.a39916a44af64ce3836e760848efaaac.17791.1530742152000000.lz4
       Message: Process 17791 (main-fuse-ld.ll) of user 1000 dumped core.

                Stack trace of thread 17791:
                #0  0x000000000038c266 n/a
(/home/wink/prgs/explore-cpp-static-linking/main-fuse-ld.lld)

GNU gdb (GDB) 8.1
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from
/home/wink/prgs/explore-cpp-static-linking/main-fuse-ld.lld...done.

warning: core file may not match specified executable file.
[New LWP 17791]
Core was generated by `./main-fuse-ld.lld 7 8 9'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x000000000038c266 in _dl_get_origin ()
(gdb) bt
#0  0x000000000038c266 in _dl_get_origin ()
#1  0x000000000038b8cf in _dl_non_dynamic_init ()
#2  0x00000000002f7151 in __libc_init_first ()
#3  0x00000000002f6e47 in __libc_start_main ()
#4  0x000000000025b02a in _start ()


Since we're seg faulting in _dl_get_origin I cloned glibc and
found the code in glibc/sysdeps/unix/sysv/linux/dl-origin.c. Here
is the routine from that file. Stepping through the code I've determined
that the code is faulting after returning from the first __mempcpy which
I've marked with "<<<< Faulting storing the 0" below:

const char *
_dl_get_origin (void)
{
  char linkval[PATH_MAX];
  char *result;
  int len;
  INTERNAL_SYSCALL_DECL (err);

  len = INTERNAL_SYSCALL (readlink, err, 3, "/proc/self/exe", linkval,
  sizeof (linkval));
  if (! INTERNAL_SYSCALL_ERROR_P (len, err) && len > 0 && linkval[0] != '[')
    {
      /* We can use this value.  */
      assert (linkval[0] == '/');
      while (len > 1 && linkval[len - 1] != '/')
        --len;
      result = (char *) malloc (len + 1);
      if (result == NULL)
         result = (char *) -1;
      else if (len == 1)
         memcpy (result, "/", 2);
      else
         *((char *) __mempcpy (result, linkval, len - 1)) = '\0';
<<<< Faulting storing the 0
    }
  else
    {
      result = (char *) -1;
      /* We use the environment variable LD_ORIGIN_PATH.  If it is set make
         a copy and strip out trailing slashes.  */
      if (GLRO(dl_origin_path) != NULL)
      {
        size_t len = strlen (GLRO(dl_origin_path));
        result = (char *) malloc (len + 1);
        if (result == NULL)
         result = (char *) -1;
        else
         {
            char *cp = __mempcpy (result, GLRO(dl_origin_path), len);
            while (cp > result + 1 && cp[-1] == '/')
              --cp;
            *cp = '\0';
         }
       }
    }

  return result;
}


Here is the objdump disassembly of _dl_get_origin and at 38c266 is the
store after
the call __mempcpy which is failing:

  38c266: c6 00 00              mov    BYTE PTR [rax],0x0     <<<<
Faulting storing the 0

000000000038c110 <_dl_get_origin>:
  38c110: 41 54                push   r12
  38c112: ba 00 10 00 00        mov    edx,0x1000
  38c117: b8 59 00 00 00        mov    eax,0x59
  38c11c: 48 8d 3d e3 47 e7 ff lea    rdi,[rip+0xffffffffffe747e3]
   # 200906 <__PRETTY_FUNCTION__.13205+0xa0>
  38c123: 55                    push   rbp
  38c124: 53                    push   rbx
  38c125: 48 81 ec 00 10 00 00 sub    rsp,0x1000
  38c12c: 48 89 e3              mov    rbx,rsp
  38c12f: 48 89 de              mov    rsi,rbx
  38c132: 0f 05                syscall
  38c134: 3d 00 f0 ff ff        cmp    eax,0xfffff000
  38c139: 77 75                ja     38c1b0 <_dl_get_origin+0xa0>
  38c13b: 85 c0                test   eax,eax
  38c13d: 7e 71                jle    38c1b0 <_dl_get_origin+0xa0>
  38c13f: 0f b6 14 24          movzx  edx,BYTE PTR [rsp]
  38c143: 80 fa 5b              cmp    dl,0x5b
  38c146: 74 68                je     38c1b0 <_dl_get_origin+0xa0>
  38c148: 80 fa 2f              cmp    dl,0x2f
  38c14b: 0f 85 27 01 00 00    jne    38c278 <_dl_get_origin+0x168>
  38c151: 83 f8 01              cmp    eax,0x1
  38c154: 74 2f                je     38c185 <_dl_get_origin+0x75>
  38c156: 89 c5                mov    ebp,eax
  38c158: 83 e8 01              sub    eax,0x1
  38c15b: 48 98                cdqe
  38c15d: 80 3c 04 2f          cmp    BYTE PTR [rsp+rax*1],0x2f
  38c161: 75 1b                jne    38c17e <_dl_get_origin+0x6e>
  38c163: e9 d8 00 00 00        jmp    38c240 <_dl_get_origin+0x130>
  38c168: 0f 1f 84 00 00 00 00 nop    DWORD PTR [rax+rax*1+0x0]
  38c16f: 00
  38c170: 48 83 e8 01          sub    rax,0x1
  38c174: 80 3c 03 2f          cmp    BYTE PTR [rbx+rax*1],0x2f
  38c178: 0f 84 c2 00 00 00    je     38c240 <_dl_get_origin+0x130>
  38c17e: 89 c5                mov    ebp,eax
  38c180: 83 f8 01              cmp    eax,0x1
  38c183: 75 eb                jne    38c170 <_dl_get_origin+0x60>
  38c185: bf 02 00 00 00        mov    edi,0x2
  38c18a: 67 e8 90 a3 fa ff    addr32 call 336520 <__malloc>
  38c190: 49 89 c4              mov    r12,rax
  38c193: 48 85 c0              test   rax,rax
  38c196: 0f 84 84 00 00 00    je     38c220 <_dl_get_origin+0x110>
  38c19c: b8 2f 00 00 00        mov    eax,0x2f
  38c1a1: 66 41 89 04 24        mov    WORD PTR [r12],ax
  38c1a6: eb 7f                jmp    38c227 <_dl_get_origin+0x117>
  38c1a8: 0f 1f 84 00 00 00 00 nop    DWORD PTR [rax+rax*1+0x0]
  38c1af: 00
  38c1b0: 48 8b 1d f1 04 02 00 mov    rbx,QWORD PTR [rip+0x204f1]
  # 3ac6a8 <_dl_origin_path>
  38c1b7: 48 85 db              test   rbx,rbx
  38c1ba: 74 64                je     38c220 <_dl_get_origin+0x110>
  38c1bc: 48 89 df              mov    rdi,rbx
  38c1bf: e8 4c 1e 01 00        call   39e010 <arena_thread_freeres+0x140>
  38c1c4: 48 89 c5              mov    rbp,rax
  38c1c7: 48 8d 78 01          lea    rdi,[rax+0x1]
  38c1cb: 67 e8 4f a3 fa ff    addr32 call 336520 <__malloc>
  38c1d1: 49 89 c4              mov    r12,rax
  38c1d4: 48 85 c0              test   rax,rax
  38c1d7: 74 47                je     38c220 <_dl_get_origin+0x110>
  38c1d9: 48 89 ea              mov    rdx,rbp
  38c1dc: 48 89 de              mov    rsi,rbx
  38c1df: 48 89 c7              mov    rdi,rax
  38c1e2: ff 15 c0 f2 01 00    call   QWORD PTR [rip+0x1f2c0]        #
3ab4a8 <__libc_enable_secure+0x184>
  38c1e8: 49 8d 54 24 01        lea    rdx,[r12+0x1]
  38c1ed: 48 39 c2              cmp    rdx,rax
  38c1f0: 72 0f                jb     38c201 <_dl_get_origin+0xf1>
  38c1f2: eb 13                jmp    38c207 <_dl_get_origin+0xf7>
  38c1f4: 0f 1f 40 00          nop    DWORD PTR [rax+0x0]
  38c1f8: 48 83 e8 01          sub    rax,0x1
  38c1fc: 48 39 c2              cmp    rdx,rax
  38c1ff: 74 06                je     38c207 <_dl_get_origin+0xf7>
  38c201: 80 78 ff 2f          cmp    BYTE PTR [rax-0x1],0x2f
  38c205: 74 f1                je     38c1f8 <_dl_get_origin+0xe8>
  38c207: c6 00 00              mov    BYTE PTR [rax],0x0
  38c20a: 48 81 c4 00 10 00 00 add    rsp,0x1000
  38c211: 4c 89 e0              mov    rax,r12
  38c214: 5b                    pop    rbx
  38c215: 5d                    pop    rbp
  38c216: 41 5c                pop    r12
  38c218: c3                    ret
  38c219: 0f 1f 80 00 00 00 00 nop    DWORD PTR [rax+0x0]
  38c220: 49 c7 c4 ff ff ff ff mov    r12,0xffffffffffffffff
  38c227: 48 81 c4 00 10 00 00 add    rsp,0x1000
  38c22e: 4c 89 e0              mov    rax,r12
  38c231: 5b                    pop    rbx
  38c232: 5d                    pop    rbp
  38c233: 41 5c                pop    r12
  38c235: c3                    ret
  38c236: 66 2e 0f 1f 84 00 00 nop    WORD PTR cs:[rax+rax*1+0x0]
  38c23d: 00 00 00
  38c240: 8d 7d 01              lea    edi,[rbp+0x1]
  38c243: 48 63 ff              movsxd rdi,edi
  38c246: 67 e8 d4 a2 fa ff    addr32 call 336520 <__malloc>
  38c24c: 49 89 c4              mov    r12,rax
  38c24f: 48 85 c0              test   rax,rax
  38c252: 74 cc                je     38c220 <_dl_get_origin+0x110>
  38c254: 8d 55 ff              lea    edx,[rbp-0x1]
  38c257: 48 89 de              mov    rsi,rbx
  38c25a: 4c 89 e7              mov    rdi,r12
  38c25d: 48 63 d2              movsxd rdx,edx
  38c260: ff 15 42 f2 01 00    call   QWORD PTR [rip+0x1f242]        #
3ab4a8 <__libc_enable_secure+0x184>
  38c266: c6 00 00              mov    BYTE PTR [rax],0x0     <<<<
Faulting storing the 0
  38c269: 48 81 c4 00 10 00 00 add    rsp,0x1000
  38c270: 4c 89 e0              mov    rax,r12
  38c273: 5b                    pop    rbx
  38c274: 5d                    pop    rbp
  38c275: 41 5c                pop    r12
  38c277: c3                    ret
  38c278: 48 8d 0d d9 7f e7 ff lea    rcx,[rip+0xffffffffffe77fd9]
   # 204258 <__PRETTY_FUNCTION__.10083>
  38c27f: ba 2f 00 00 00        mov    edx,0x2f
  38c284: 48 8d 35 e5 80 e7 ff lea    rsi,[rip+0xffffffffffe780e5]
   # 204370 <__PRETTY_FUNCTION__.9073+0x10>
  38c28b: 48 8d 3d 7c 67 e7 ff lea    rdi,[rip+0xffffffffffe7677c]
   # 202a0e <null+0x461>
  38c292: e8 59 68 f7 ff        call   302af0 <__assert_fail>
  38c297: cc                    int3
  38c298: cc                    int3
  38c299: cc                    int3
  38c29a: cc                    int3
  38c29b: cc                    int3
  38c29c: cc                    int3
  38c29d: cc                    int3
  38c29e: cc                    int3
  38c29f: cc                    int3


Now I'll go through executing main-fuse-ld.lld using gdb. I've added comments
below introduced with ">>>":

$ gdb --args ./main-fuse-ld.lld
GNU gdb (GDB) 8.1
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ./main-fuse-ld.lld...done.

>>> Set a break point at the call to "__mempcpy" at 0x38c260, the run and
>>> and display the instruction where we stopped:

(gdb) break *0x38c260
Breakpoint 1 at 0x38c260
(gdb) r
Starting program: /home/wink/prgs/explore-cpp-static-linking/main-fuse-ld.lld

Breakpoint 1, 0x000000000038c260 in _dl_get_origin ()
(gdb) display/i $pc
2: x/i $pc
=> 0x38c260 <_dl_get_origin+336>: call   QWORD PTR [rip+0x1f242]
 # 0x3ab4a8

>>> Display the registers. rsi is the source, rdi is the destination
>>> and rdx is the count

(gdb) i r
rax            0x3b6460 3892320
rbx            0x7fffffffcd70 140737488342384
rcx            0x3b6460 3892320
rdx            0x2a 42
rsi            0x7fffffffcd70 140737488342384
rdi            0x3b6460 3892320
rbp            0x2b 0x2b
rsp            0x7fffffffcd70 0x7fffffffcd70
r8             0x3 3
r9             0x0 0
r10            0xfffffffffffff000 -4096
r11            0x0 0
r12            0x3b6460 3892320
r13            0x0 0
r14            0x3a1510 3806480
r15            0x0 0
rip            0x38c260 0x38c260 <_dl_get_origin+336>
eflags         0x206 [ PF IF ]
cs             0x33 51
ss             0x2b 43
ds             0x0 0
es             0x0 0
fs             0x0 0
gs             0x0 0

>>> Dump the source string

(gdb) x/s $rsi
0x7fffffffcd70: "/home/wink/prgs/explore-cpp-static-linking/main-fuse-ld.lld"

>>> Dump 43 bytes of the destination

(gdb) x/43x $rdi
0x3b6460: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x3b6468: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x3b6470: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x3b6478: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x3b6480: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x3b6488: 0x00 0x00 0x00

>>> Display the target of the "=> 0x38c260 <_dl_get_origin+336>: call   QWORD PTR [rip+0x1f242]        # 0x3ab4a8".
>>> Very oddly the first thing it does is wipeout edx, which is the length:

(gdb) x/20i *0x3ab4a8
   0x339130 <mempcpy>: mov    edx,DWORD PTR [rip+0x7b316]        #
0x3b444c <_dl_x86_cpu_features+76>
   0x339136 <mempcpy+6>: lea    rax,[rip+0x337d3]        # 0x36c910
<__mempcpy_erms>
   0x33913d <mempcpy+13>: test   edx,0x80000
   0x339143 <mempcpy+19>: jne    0x33919e <mempcpy+110>
   0x339145 <mempcpy+21>: mov    eax,edx
   0x339147 <mempcpy+23>: and    eax,0x101000
   0x33914c <mempcpy+28>: cmp    eax,0x1000
   0x339151 <mempcpy+33>: je     0x3391a0 <mempcpy+112>
   0x339153 <mempcpy+35>: test   dh,0x8
   0x339156 <mempcpy+38>: je     0x339178 <mempcpy+72>
   0x339158 <mempcpy+40>: test   BYTE PTR [rip+0x7b2be],0x2        #
0x3b441d <_dl_x86_cpu_features+29>
   0x33915f <mempcpy+47>: lea    rdx,[rip+0x33b0a]        # 0x36cc70
<__mempcpy_avx_unaligned>
   0x339166 <mempcpy+54>: lea    rax,[rip+0x33b43]        # 0x36ccb0
<__mempcpy_avx_unaligned_erms>
   0x33916d <mempcpy+61>: cmove  rax,rdx
   0x339171 <mempcpy+65>: ret
   0x339172 <mempcpy+66>: nop    WORD PTR [rax+rax*1+0x0]
   0x339178 <mempcpy+72>: test   BYTE PTR [rip+0x7b292],0x2        #
0x3b4411 <_dl_x86_cpu_features+17>
   0x33917f <mempcpy+79>: je     0x3391d0 <mempcpy+160>
   0x339181 <mempcpy+81>: test   edx,0x40000
   0x339187 <mempcpy+87>: jne    0x3391d0 <mempcpy+160>

>>> Now step through mempcpy

(gdb) si
0x0000000000339130 in mempcpy ()
2: x/i $pc
=> 0x339130 <mempcpy>: mov    edx,DWORD PTR [rip+0x7b316]        #
0x3b444c <_dl_x86_cpu_features+76>
(gdb)
0x0000000000339136 in mempcpy ()
2: x/i $pc
=> 0x339136 <mempcpy+6>: lea    rax,[rip+0x337d3]        # 0x36c910
<__mempcpy_erms>
(gdb)
0x000000000033913d in mempcpy ()
2: x/i $pc
=> 0x33913d <mempcpy+13>: test   edx,0x80000
(gdb)
0x0000000000339143 in mempcpy ()
2: x/i $pc
=> 0x339143 <mempcpy+19>: jne    0x33919e <mempcpy+110>
(gdb)
0x0000000000339145 in mempcpy ()
2: x/i $pc
=> 0x339145 <mempcpy+21>: mov    eax,edx
(gdb)
0x0000000000339147 in mempcpy ()
2: x/i $pc
=> 0x339147 <mempcpy+23>: and    eax,0x101000
(gdb)
0x000000000033914c in mempcpy ()
2: x/i $pc
=> 0x33914c <mempcpy+28>: cmp    eax,0x1000
(gdb)
0x0000000000339151 in mempcpy ()
2: x/i $pc
=> 0x339151 <mempcpy+33>: je     0x3391a0 <mempcpy+112>
(gdb)
0x0000000000339153 in mempcpy ()
2: x/i $pc
=> 0x339153 <mempcpy+35>: test   dh,0x8
(gdb)
0x0000000000339156 in mempcpy ()
2: x/i $pc
=> 0x339156 <mempcpy+38>: je     0x339178 <mempcpy+72>
(gdb)
0x0000000000339158 in mempcpy ()
2: x/i $pc
=> 0x339158 <mempcpy+40>: test   BYTE PTR [rip+0x7b2be],0x2        #
0x3b441d <_dl_x86_cpu_features+29>
(gdb)
0x000000000033915f in mempcpy ()
2: x/i $pc
=> 0x33915f <mempcpy+47>: lea    rdx,[rip+0x33b0a]        # 0x36cc70
<__mempcpy_avx_unaligned>
(gdb)
0x0000000000339166 in mempcpy ()
2: x/i $pc
=> 0x339166 <mempcpy+54>: lea    rax,[rip+0x33b43]        # 0x36ccb0
<__mempcpy_avx_unaligned_erms>
(gdb)
0x000000000033916d in mempcpy ()
2: x/i $pc
=> 0x33916d <mempcpy+61>: cmove  rax,rdx
(gdb)
0x0000000000339171 in mempcpy ()
2: x/i $pc
=> 0x339171 <mempcpy+65>: ret
(gdb)
0x000000000038c266 in _dl_get_origin ()
2: x/i $pc
=> 0x38c266 <_dl_get_origin+342>: mov    BYTE PTR [rax],0x0


>>> Now we've returned to _dl_get_origin so display the registers
>>> and we see a "bad" address in rax:

(gdb) i r
rax            0x36ccb0 3591344
rbx            0x7fffffffcd70 140737488342384
rcx            0x3b6460 3892320
rdx            0x36cc70 3591280
rsi            0x7fffffffcd70 140737488342384
rdi            0x3b6460 3892320
rbp            0x2b 0x2b
rsp            0x7fffffffcd70 0x7fffffffcd70
r8             0x3 3
r9             0x0 0
r10            0xfffffffffffff000 -4096
r11            0x0 0
r12            0x3b6460 3892320
r13            0x0 0
r14            0x3a1510 3806480
r15            0x0 0
rip            0x38c266 0x38c266 <_dl_get_origin+342>
eflags         0x202 [ IF ]
cs             0x33 51
ss             0x2b 43
ds             0x0 0
es             0x0 0
fs             0x0 0
gs             0x0 0


>>> "step instruction" and we seg fault

(gdb) si

Program received signal SIGSEGV, Segmentation fault.
0x000000000038c266 in _dl_get_origin ()
2: x/i $pc
=> 0x38c266 <_dl_get_origin+342>: mov    BYTE PTR [rax],0x0

>>> So where is rax pointing? As it turns out its actually pointing
>>> code, __mempcpy_avx_unaligned_erms, not data! This is
>>> actually the code that is called when you link with gold. So
>>> lld has not correctly linked up the code.

(gdb) x/i $rax
   0x36ccb0 <__mempcpy_avx_unaligned_erms>: mov    rax,rdi
(gdb) disassemble __mempcpy_avx_unaligned_erms
Dump of assembler code for function __mempcpy_avx_unaligned_erms:
   0x000000000036ccb0 <+0>: mov    rax,rdi
   0x000000000036ccb3 <+3>: add    rax,rdx
   0x000000000036ccb6 <+6>: jmp    0x36ccc3 <__memmove_avx_unaligned_erms+3>
End of assembler dump.
(gdb) disassemble 0x36ccc3
Dump of assembler code for function __memmove_avx_unaligned_erms:
   0x000000000036ccc0 <+0>: mov    rax,rdi
   0x000000000036ccc3 <+3>: cmp    rdx,0x20
   0x000000000036ccc7 <+7>: jb     0x36cd12 <__memmove_avx_unaligned_erms+82>
   0x000000000036ccc9 <+9>: cmp    rdx,0x40
   0x000000000036cccd <+13>: ja     0x36cd72 <__memmove_avx_unaligned_erms+178>
   0x000000000036ccd3 <+19>: vmovdqu ymm0,YMMWORD PTR [rsi]
   0x000000000036ccd7 <+23>: vmovdqu ymm1,YMMWORD PTR [rsi+rdx*1-0x20]
   0x000000000036ccdd <+29>: vmovdqu YMMWORD PTR [rdi],ymm0
   0x000000000036cce1 <+33>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x20],ymm1
   0x000000000036cce7 <+39>: vzeroupper
   0x000000000036ccea <+42>: ret
   0x000000000036cceb <+43>: cmp    rdx,QWORD PTR [rip+0x3f896]
# 0x3ac588 <__x86_shared_non_temporal_threshold>
   0x000000000036ccf2 <+50>: jae    0x36ce1d <__memmove_avx_unaligned_erms+349>
   0x000000000036ccf8 <+56>: cmp    rdi,rsi
   0x000000000036ccfb <+59>: jb     0x36cd0c <__memmove_avx_unaligned_erms+76>
   0x000000000036ccfd <+61>: je     0x36cd11 <__memmove_avx_unaligned_erms+81>
   0x000000000036ccff <+63>: lea    r9,[rsi+rdx*1]
   0x000000000036cd03 <+67>: cmp    rdi,r9
   0x000000000036cd06 <+70>: jb     0x36ced1 <__memmove_avx_unaligned_erms+529>
   0x000000000036cd0c <+76>: mov    rcx,rdx
   0x000000000036cd0f <+79>: rep movs BYTE PTR es:[rdi],BYTE PTR ds:[rsi]
   0x000000000036cd11 <+81>: ret
   0x000000000036cd12 <+82>: cmp    dl,0x10
   0x000000000036cd15 <+85>: jae    0x36cd2e <__memmove_avx_unaligned_erms+110>
   0x000000000036cd17 <+87>: cmp    dl,0x8
   0x000000000036cd1a <+90>: jae    0x36cd43 <__memmove_avx_unaligned_erms+131>
   0x000000000036cd1c <+92>: cmp    dl,0x4
   0x000000000036cd1f <+95>: jae    0x36cd54 <__memmove_avx_unaligned_erms+148>
   0x000000000036cd21 <+97>: cmp    dl,0x1
   0x000000000036cd24 <+100>: ja     0x36cd61 <__memmove_avx_unaligned_erms+161>
   0x000000000036cd26 <+102>: jb     0x36cd2d <__memmove_avx_unaligned_erms+109>
   0x000000000036cd28 <+104>: movzx  ecx,BYTE PTR [rsi]
   0x000000000036cd2b <+107>: mov    BYTE PTR [rdi],cl
   0x000000000036cd2d <+109>: ret
   0x000000000036cd2e <+110>: vmovdqu xmm0,XMMWORD PTR [rsi]
   0x000000000036cd32 <+114>: vmovdqu xmm1,XMMWORD PTR [rsi+rdx*1-0x10]
   0x000000000036cd38 <+120>: vmovdqu XMMWORD PTR [rdi],xmm0
   0x000000000036cd3c <+124>: vmovdqu XMMWORD PTR [rdi+rdx*1-0x10],xmm1
   0x000000000036cd42 <+130>: ret
   0x000000000036cd43 <+131>: mov    rcx,QWORD PTR [rsi+rdx*1-0x8]
   0x000000000036cd48 <+136>: mov    rsi,QWORD PTR [rsi]
   0x000000000036cd4b <+139>: mov    QWORD PTR [rdi+rdx*1-0x8],rcx
   0x000000000036cd50 <+144>: mov    QWORD PTR [rdi],rsi
   0x000000000036cd53 <+147>: ret
   0x000000000036cd54 <+148>: mov    ecx,DWORD PTR [rsi+rdx*1-0x4]
   0x000000000036cd58 <+152>: mov    esi,DWORD PTR [rsi]
   0x000000000036cd5a <+154>: mov    DWORD PTR [rdi+rdx*1-0x4],ecx
   0x000000000036cd5e <+158>: mov    DWORD PTR [rdi],esi
   0x000000000036cd60 <+160>: ret
   0x000000000036cd61 <+161>: movzx  ecx,WORD PTR [rsi+rdx*1-0x2]
   0x000000000036cd66 <+166>: movzx  esi,WORD PTR [rsi]
   0x000000000036cd69 <+169>: mov    WORD PTR [rdi+rdx*1-0x2],cx
   0x000000000036cd6e <+174>: mov    WORD PTR [rdi],si
   0x000000000036cd71 <+177>: ret
   0x000000000036cd72 <+178>: cmp    rdx,0x1000
   0x000000000036cd79 <+185>: ja     0x36cceb <__memmove_avx_unaligned_erms+43>
   0x000000000036cd7f <+191>: cmp    rdx,0x100
   0x000000000036cd86 <+198>: ja     0x36ce1d <__memmove_avx_unaligned_erms+349>
   0x000000000036cd8c <+204>: cmp    rdx,0x80
   0x000000000036cd93 <+211>: jb     0x36cdef <__memmove_avx_unaligned_erms+303>
   0x000000000036cd95 <+213>: vmovdqu ymm0,YMMWORD PTR [rsi]
   0x000000000036cd99 <+217>: vmovdqu ymm1,YMMWORD PTR [rsi+0x20]
   0x000000000036cd9e <+222>: vmovdqu ymm2,YMMWORD PTR [rsi+0x40]
   0x000000000036cda3 <+227>: vmovdqu ymm3,YMMWORD PTR [rsi+0x60]
   0x000000000036cda8 <+232>: vmovdqu ymm4,YMMWORD PTR [rsi+rdx*1-0x20]
   0x000000000036cdae <+238>: vmovdqu ymm5,YMMWORD PTR [rsi+rdx*1-0x40]
   0x000000000036cdb4 <+244>: vmovdqu ymm6,YMMWORD PTR [rsi+rdx*1-0x60]
   0x000000000036cdba <+250>: vmovdqu ymm7,YMMWORD PTR [rsi+rdx*1-0x80]
   0x000000000036cdc0 <+256>: vmovdqu YMMWORD PTR [rdi],ymm0
   0x000000000036cdc4 <+260>: vmovdqu YMMWORD PTR [rdi+0x20],ymm1
   0x000000000036cdc9 <+265>: vmovdqu YMMWORD PTR [rdi+0x40],ymm2
   0x000000000036cdce <+270>: vmovdqu YMMWORD PTR [rdi+0x60],ymm3
   0x000000000036cdd3 <+275>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x20],ymm4
   0x000000000036cdd9 <+281>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x40],ymm5
   0x000000000036cddf <+287>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x60],ymm6
   0x000000000036cde5 <+293>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x80],ymm7
   0x000000000036cdeb <+299>: vzeroupper
   0x000000000036cdee <+302>: ret
   0x000000000036cdef <+303>: vmovdqu ymm0,YMMWORD PTR [rsi]
   0x000000000036cdf3 <+307>: vmovdqu ymm1,YMMWORD PTR [rsi+0x20]
   0x000000000036cdf8 <+312>: vmovdqu ymm2,YMMWORD PTR [rsi+rdx*1-0x20]
   0x000000000036cdfe <+318>: vmovdqu ymm3,YMMWORD PTR [rsi+rdx*1-0x40]
   0x000000000036ce04 <+324>: vmovdqu YMMWORD PTR [rdi],ymm0
   0x000000000036ce08 <+328>: vmovdqu YMMWORD PTR [rdi+0x20],ymm1
   0x000000000036ce0d <+333>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x20],ymm2
   0x000000000036ce13 <+339>: vmovdqu YMMWORD PTR [rdi+rdx*1-0x40],ymm3
   0x000000000036ce19 <+345>: vzeroupper
   0x000000000036ce1c <+348>: ret
   0x000000000036ce1d <+349>: cmp    rdi,rsi
   0x000000000036ce20 <+352>: ja     0x36ced1 <__memmove_avx_unaligned_erms+529>
   0x000000000036ce26 <+358>: je     0x36cd11 <__memmove_avx_unaligned_erms+81>
   0x000000000036ce2c <+364>: vmovdqu ymm4,YMMWORD PTR [rsi]
   0x000000000036ce30 <+368>: vmovdqu ymm5,YMMWORD PTR [rsi+rdx*1-0x20]
   0x000000000036ce36 <+374>: vmovdqu ymm6,YMMWORD PTR [rsi+rdx*1-0x40]
   0x000000000036ce3c <+380>: vmovdqu ymm7,YMMWORD PTR [rsi+rdx*1-0x60]
   0x000000000036ce42 <+386>: vmovdqu ymm8,YMMWORD PTR [rsi+rdx*1-0x80]
   0x000000000036ce48 <+392>: mov    r11,rdi
   0x000000000036ce4b <+395>: lea    rcx,[rdi+rdx*1-0x20]
   0x000000000036ce50 <+400>: mov    r8,rdi
   0x000000000036ce53 <+403>: and    r8,0x1f
   0x000000000036ce57 <+407>: sub    r8,0x20
   0x000000000036ce5b <+411>: sub    rsi,r8
   0x000000000036ce5e <+414>: sub    rdi,r8
   0x000000000036ce61 <+417>: add    rdx,r8
   0x000000000036ce64 <+420>: cmp    rdx,QWORD PTR [rip+0x3f71d]
 # 0x3ac588 <__x86_shared_non_temporal_threshold>
   0x000000000036ce6b <+427>: ja     0x36cf78 <__memmove_avx_unaligned_erms+696>
   0x000000000036ce71 <+433>: vmovdqu ymm0,YMMWORD PTR [rsi]
   0x000000000036ce75 <+437>: vmovdqu ymm1,YMMWORD PTR [rsi+0x20]
   0x000000000036ce7a <+442>: vmovdqu ymm2,YMMWORD PTR [rsi+0x40]
   0x000000000036ce7f <+447>: vmovdqu ymm3,YMMWORD PTR [rsi+0x60]
   0x000000000036ce84 <+452>: add    rsi,0x80
   0x000000000036ce8b <+459>: sub    rdx,0x80
   0x000000000036ce92 <+466>: vmovdqa YMMWORD PTR [rdi],ymm0
   0x000000000036ce96 <+470>: vmovdqa YMMWORD PTR [rdi+0x20],ymm1
   0x000000000036ce9b <+475>: vmovdqa YMMWORD PTR [rdi+0x40],ymm2
   0x000000000036cea0 <+480>: vmovdqa YMMWORD PTR [rdi+0x60],ymm3
   0x000000000036cea5 <+485>: add    rdi,0x80
   0x000000000036ceac <+492>: cmp    rdx,0x80
   0x000000000036ceb3 <+499>: ja     0x36ce71 <__memmove_avx_unaligned_erms+433>
   0x000000000036ceb5 <+501>: vmovdqu YMMWORD PTR [rcx],ymm5
   0x000000000036ceb9 <+505>: vmovdqu YMMWORD PTR [rcx-0x20],ymm6
   0x000000000036cebe <+510>: vmovdqu YMMWORD PTR [rcx-0x40],ymm7
   0x000000000036cec3 <+515>: vmovdqu YMMWORD PTR [rcx-0x60],ymm8
   0x000000000036cec8 <+520>: vmovdqu YMMWORD PTR [r11],ymm4
   0x000000000036cecd <+525>: vzeroupper
   0x000000000036ced0 <+528>: ret
   0x000000000036ced1 <+529>: vmovdqu ymm4,YMMWORD PTR [rsi]
   0x000000000036ced5 <+533>: vmovdqu ymm5,YMMWORD PTR [rsi+0x20]
   0x000000000036ceda <+538>: vmovdqu ymm6,YMMWORD PTR [rsi+0x40]
   0x000000000036cedf <+543>: vmovdqu ymm7,YMMWORD PTR [rsi+0x60]
   0x000000000036cee4 <+548>: vmovdqu ymm8,YMMWORD PTR [rsi+rdx*1-0x20]
   0x000000000036ceea <+554>: lea    r11,[rdi+rdx*1-0x20]
   0x000000000036ceef <+559>: lea    rcx,[rsi+rdx*1-0x20]
   0x000000000036cef4 <+564>: mov    r9,r11
   0x000000000036cef7 <+567>: mov    r8,r11
   0x000000000036cefa <+570>: and    r8,0x1f
   0x000000000036cefe <+574>: sub    rcx,r8
   0x000000000036cf01 <+577>: sub    r9,r8
   0x000000000036cf04 <+580>: sub    rdx,r8
   0x000000000036cf07 <+583>: cmp    rdx,QWORD PTR [rip+0x3f67a]
 # 0x3ac588 <__x86_shared_non_temporal_threshold>
   0x000000000036cf0e <+590>: ja     0x36d004 <__memmove_avx_unaligned_erms+836>
   0x000000000036cf14 <+596>: vmovdqu ymm0,YMMWORD PTR [rcx]
   0x000000000036cf18 <+600>: vmovdqu ymm1,YMMWORD PTR [rcx-0x20]
   0x000000000036cf1d <+605>: vmovdqu ymm2,YMMWORD PTR [rcx-0x40]
   0x000000000036cf22 <+610>: vmovdqu ymm3,YMMWORD PTR [rcx-0x60]
   0x000000000036cf27 <+615>: sub    rcx,0x80
   0x000000000036cf2e <+622>: sub    rdx,0x80
   0x000000000036cf35 <+629>: vmovdqa YMMWORD PTR [r9],ymm0
   0x000000000036cf3a <+634>: vmovdqa YMMWORD PTR [r9-0x20],ymm1
   0x000000000036cf40 <+640>: vmovdqa YMMWORD PTR [r9-0x40],ymm2
   0x000000000036cf46 <+646>: vmovdqa YMMWORD PTR [r9-0x60],ymm3
   0x000000000036cf4c <+652>: sub    r9,0x80
   0x000000000036cf53 <+659>: cmp    rdx,0x80
   0x000000000036cf5a <+666>: ja     0x36cf14 <__memmove_avx_unaligned_erms+596>
   0x000000000036cf5c <+668>: vmovdqu YMMWORD PTR [rdi],ymm4
   0x000000000036cf60 <+672>: vmovdqu YMMWORD PTR [rdi+0x20],ymm5
   0x000000000036cf65 <+677>: vmovdqu YMMWORD PTR [rdi+0x40],ymm6
   0x000000000036cf6a <+682>: vmovdqu YMMWORD PTR [rdi+0x60],ymm7
   0x000000000036cf6f <+687>: vmovdqu YMMWORD PTR [r11],ymm8
   0x000000000036cf74 <+692>: vzeroupper
   0x000000000036cf77 <+695>: ret
   0x000000000036cf78 <+696>: lea    r10,[rdi+rdx*1]
   0x000000000036cf7c <+700>: cmp    rsi,r10
   0x000000000036cf7f <+703>: jb     0x36ce71 <__memmove_avx_unaligned_erms+433>
   0x000000000036cf85 <+709>: prefetcht0 BYTE PTR [rsi+0x100]
   0x000000000036cf8c <+716>: prefetcht0 BYTE PTR [rsi+0x140]
   0x000000000036cf93 <+723>: prefetcht0 BYTE PTR [rsi+0x180]
   0x000000000036cf9a <+730>: prefetcht0 BYTE PTR [rsi+0x1c0]
   0x000000000036cfa1 <+737>: vmovdqu ymm0,YMMWORD PTR [rsi]
   0x000000000036cfa5 <+741>: vmovdqu ymm1,YMMWORD PTR [rsi+0x20]
   0x000000000036cfaa <+746>: vmovdqu ymm2,YMMWORD PTR [rsi+0x40]
   0x000000000036cfaf <+751>: vmovdqu ymm3,YMMWORD PTR [rsi+0x60]
   0x000000000036cfb4 <+756>: add    rsi,0x80
   0x000000000036cfbb <+763>: sub    rdx,0x80
   0x000000000036cfc2 <+770>: vmovntdq YMMWORD PTR [rdi],ymm0
   0x000000000036cfc6 <+774>: vmovntdq YMMWORD PTR [rdi+0x20],ymm1
   0x000000000036cfcb <+779>: vmovntdq YMMWORD PTR [rdi+0x40],ymm2
   0x000000000036cfd0 <+784>: vmovntdq YMMWORD PTR [rdi+0x60],ymm3
   0x000000000036cfd5 <+789>: add    rdi,0x80
   0x000000000036cfdc <+796>: cmp    rdx,0x80
   0x000000000036cfe3 <+803>: ja     0x36cf85 <__memmove_avx_unaligned_erms+709>
   0x000000000036cfe5 <+805>: sfence
   0x000000000036cfe8 <+808>: vmovdqu YMMWORD PTR [rcx],ymm5
   0x000000000036cfec <+812>: vmovdqu YMMWORD PTR [rcx-0x20],ymm6
   0x000000000036cff1 <+817>: vmovdqu YMMWORD PTR [rcx-0x40],ymm7
   0x000000000036cff6 <+822>: vmovdqu YMMWORD PTR [rcx-0x60],ymm8
   0x000000000036cffb <+827>: vmovdqu YMMWORD PTR [r11],ymm4
   0x000000000036d000 <+832>: vzeroupper
   0x000000000036d003 <+835>: ret
   0x000000000036d004 <+836>: lea    r10,[rcx+rdx*1]
   0x000000000036d008 <+840>: cmp    r9,r10
   0x000000000036d00b <+843>: jb     0x36cf14 <__memmove_avx_unaligned_erms+596>
   0x000000000036d011 <+849>: prefetcht0 BYTE PTR [rcx-0x100]
   0x000000000036d018 <+856>: prefetcht0 BYTE PTR [rcx-0x140]
   0x000000000036d01f <+863>: prefetcht0 BYTE PTR [rcx-0x180]
   0x000000000036d026 <+870>: prefetcht0 BYTE PTR [rcx-0x1c0]
   0x000000000036d02d <+877>: vmovdqu ymm0,YMMWORD PTR [rcx]
   0x000000000036d031 <+881>: vmovdqu ymm1,YMMWORD PTR [rcx-0x20]
   0x000000000036d036 <+886>: vmovdqu ymm2,YMMWORD PTR [rcx-0x40]
   0x000000000036d03b <+891>: vmovdqu ymm3,YMMWORD PTR [rcx-0x60]
   0x000000000036d040 <+896>: sub    rcx,0x80
   0x000000000036d047 <+903>: sub    rdx,0x80
   0x000000000036d04e <+910>: vmovntdq YMMWORD PTR [r9],ymm0
   0x000000000036d053 <+915>: vmovntdq YMMWORD PTR [r9-0x20],ymm1
   0x000000000036d059 <+921>: vmovntdq YMMWORD PTR [r9-0x40],ymm2
   0x000000000036d05f <+927>: vmovntdq YMMWORD PTR [r9-0x60],ymm3
   0x000000000036d065 <+933>: sub    r9,0x80
   0x000000000036d06c <+940>: cmp    rdx,0x80
   0x000000000036d073 <+947>: ja     0x36d011 <__memmove_avx_unaligned_erms+849>
   0x000000000036d075 <+949>: sfence
   0x000000000036d078 <+952>: vmovdqu YMMWORD PTR [rdi],ymm4
   0x000000000036d07c <+956>: vmovdqu YMMWORD PTR [rdi+0x20],ymm5
   0x000000000036d081 <+961>: vmovdqu YMMWORD PTR [rdi+0x40],ymm6
   0x000000000036d086 <+966>: vmovdqu YMMWORD PTR [rdi+0x60],ymm7
   0x000000000036d08b <+971>: vmovdqu YMMWORD PTR [r11],ymm8
   0x000000000036d090 <+976>: vzeroupper
   0x000000000036d093 <+979>: ret
End of assembler dump.
(gdb)



More information about the cfe-dev mailing list