[PATCH] D19904: XRay: Add entry and exit sleds

Mon May 9 21:49:20 PDT 2016

dberris added inline comments.

================
Comment at: lib/Target/X86/X86MCInstLower.cpp:1082
@@ +1081,3 @@
+  //   jmp .tmpN
+  //   # 9 bytes worth of noops
+  // .tmpN
----------------
dberris wrote:
> bmakam wrote:
> > Could you please expand on why you need 9 bytes of noops here? I am not quite familiar with x86_64 but was under the impression that on x86_64 the jmp instruction is 1 byte for opcode and 4 bytes for signed relative displacement, so shouldn't 5 bytes worth of nops be sufficient?
> Good question, thanks.
> 
> I have to check whether we're using the right version of JMP, but I'm specifically looking for the version that's one byte for the JMP instruction, and 8 bits (1 byte) for the relative offset. So far I haven't been able to spell `jmp +0x09` correctly and have it work, without having an additional symbol as a target for the jump instruction. If we get that right, we can then add the 9 byte nops we need to get exactly 11 bytes for the function entry.
> 
> Is there a fool-proof way of spelling "JMP +0x09" with the builder interface? Or should I add another JMP instruction in X86 that supports the 8-bit displacement immediate operand?
> 
> The reason why I can't use a JMP that isn't 2 bytes, is because it's really hard to write just 5 bytes atomically. I can probably do something with an 8-byte atomic write, but enforcing that 8-byte write doesn't span cache lines is also very tricky to make safe.
Actually now that I've had a look at the generated object file, I can confirm that we're using the two-byte version of `JMP` with this construct. Quick test:

test.cc:
```
#include <cstdio>

[[clang::xray_always_instrument]] void foo() { std::printf("Hello, XRay!\n"); }

int main(int argc, char* argv[]) { foo(); }
```

Compiled with (modified clang to emit IR that has annotated functions for XRay):

./bin/clang -fxray-instrument -fxray-instruction-threshold=1 -std=c++11 -x c++ -S test.cc -emit-llvm

Creates this IR listing:

```
; ModuleID = 'test.cc'
source_filename = "test.cc"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [14 x i8] c"Hello, XRay!\0A\00", align 1

; Function Attrs: uwtable
define void @_Z3foov() #0 {
entry:
  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str, i32 0, i32 0))
  ret void
}

declare i32 @printf(i8*, ...) #1

; Function Attrs: norecurse uwtable
define i32 @main(i32 %argc, i8** %argv) #2 {
entry:
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  call void @_Z3foov()
  ret i32 0
}

attributes #0 = { uwtable "disable-tail-calls"="false" "function-instrument"="xray-always" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "xray-instruction-threshold"="1" }

!llvm.ident = !{!0}

!0 = !{!"clang version 3.9.0 (http://llvm.org/git/clang.git 2b9ed9227330789f24e56fe9e800e7be2111073b) (http://llvm.org/git/llvm.git 1f3e353113ff9e2f835955bf005a8a5e25f16ad1)"}
```

Then compiled+disassembled this way:

./bin/llc -filetype=obj -o - < test.ll | ./bin/llvm-objdump -disassemble -

Produces the following output:

```
<stdin>:        file format ELF64-x86-64

Disassembly of section .text:
_Z3foov:
       0:       eb 09   jmp     9 <_Z3foov+0xB>
       2:       66 0f 1f 84 00 00 02 00 00      nopw    512(%rax,%rax)
       b:       55      pushq   %rbp
       c:       48 89 e5        movq    %rsp, %rbp
       f:       bf 00 00 00 00  movl    $0, %edi
      14:       31 c0   xorl    %eax, %eax
      16:       e8 00 00 00 00  callq   0 <_Z3foov+0x1B>
      1b:       5d      popq    %rbp
      1c:       c3      retq
      1d:       2e 66 0f 1f 84 00 00 02 00 00   nopw    %cs:512(%rax,%rax)
      27:       66 0f 1f 84 00 00 00 00 00      nopw    (%rax,%rax)

main:
      30:       eb 09   jmp     9 <main+0xB>
      32:       66 0f 1f 84 00 00 02 00 00      nopw    512(%rax,%rax)
      3b:       55      pushq   %rbp
      3c:       48 89 e5        movq    %rsp, %rbp
      3f:       48 83 ec 10     subq    $16, %rsp
      43:       89 7d fc        movl    %edi, -4(%rbp)
      46:       48 89 75 f0     movq    %rsi, -16(%rbp)
      4a:       e8 b1 ff ff ff  callq   -79 <_Z3foov>
      4f:       31 c0   xorl    %eax, %eax
      51:       48 83 c4 10     addq    $16, %rsp
      55:       5d      popq    %rbp
      56:       c3      retq
      57:       2e 66 0f 1f 84 00 00 02 00 00   nopw    %cs:512(%rax,%rax)
```

http://reviews.llvm.org/D19904