[llvm] [XRay][X86] Handle conditional calls when lowering patchable tail calls (PR #89364)

Ricky Zhou via llvm-commits llvm-commits at lists.llvm.org
Mon May 27 20:32:52 PDT 2024


https://github.com/rickyz updated https://github.com/llvm/llvm-project/pull/89364

>From 1bbcedb5c9c9b4992b67d3e70554585163870866 Mon Sep 17 00:00:00 2001
From: Ricky Zhou <ricky at rzhou.org>
Date: Fri, 19 Apr 2024 02:44:07 -0700
Subject: [PATCH 1/2] [xray] Handle conditional calls when lowering patchable
 tail calls.

xray instruments tail call function exits by inserting a nop sled before
the tail call. When tracing is enabled, the nop sled is replaced with a
call to `__xray_FunctionTailExit()`. This currently does not work for
conditional tail calls, as the instrumentation assumes that the tail
call will be unconditional. This causes two issues:
 - `__xray_FunctionTailExit()` is inappropately called even when the
   tail call is not taken.
 - `__xray_FunctionTailExit()`'s prologue/epilogue adjusts the stack
   pointer with add/sub instructions. This clobbers condition flags,
   which can flip the condition used for the tail call, leading to
   incorrect program behavior.

Fix this by rewriting conditional calls when lowering patchable tail
calls.

With this change, a conditional patchable tail call like:
```
  je target
```

Will be lowered to:
```
  jne .fallthrough
  .p2align 1, ..
.Lxray_sled_N:
  SLED_CODE
  jmp target
.fallthrough:
```
---
 llvm/lib/Target/X86/X86MCInstLower.cpp        | 40 ++++++++++++++++---
 .../CodeGen/X86/xray-conditional-tail-call.ll | 35 ++++++++++++++++
 2 files changed, 69 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/xray-conditional-tail-call.ll

diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e6510be6b9afd..96d6a6f42035d 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -22,6 +22,7 @@
 #include "X86RegisterInfo.h"
 #include "X86ShuffleDecodeConstantPool.h"
 #include "X86Subtarget.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -1361,6 +1362,35 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
                                              X86MCInstLower &MCIL) {
+  MCInst TC;
+  TC.setOpcode(convertTailJumpOpcode(MI.getOperand(0).getImm()));
+  // Drop the tail jump opcode.
+  auto TCOperands = drop_begin(MI.operands());
+  bool IsConditional = TC.getOpcode() == X86::JCC_1;
+  MCSymbol *FallthroughLabel;
+  if (IsConditional) {
+    // Rewrite:
+    //   je target
+    //
+    // To:
+    //   jne .fallthrough
+    //   .p2align 1, ...
+    // .Lxray_sled_N:
+    //   SLED_CODE
+    //   jmp target
+    // .fallthrough:
+    FallthroughLabel = OutContext.createTempSymbol();
+    EmitToStreamer(
+        *OutStreamer,
+        MCInstBuilder(X86::JCC_1)
+            .addExpr(MCSymbolRefExpr::create(FallthroughLabel, OutContext))
+            .addImm(X86::GetOppositeBranchCondition(
+                static_cast<X86::CondCode>(MI.getOperand(2).getImm()))));
+    TC.setOpcode(X86::JMP_1);
+    // Drop the condition code.
+    TCOperands = drop_end(TCOperands);
+  }
+
   NoAutoPaddingScope NoPadScope(*OutStreamer);
 
   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
@@ -1382,18 +1412,16 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
   OutStreamer->emitLabel(Target);
   recordSled(CurSled, MI, SledKind::TAIL_CALL, 2);
 
-  unsigned OpCode = MI.getOperand(0).getImm();
-  OpCode = convertTailJumpOpcode(OpCode);
-  MCInst TC;
-  TC.setOpcode(OpCode);
-
   // Before emitting the instruction, add a comment to indicate that this is
   // indeed a tail call.
   OutStreamer->AddComment("TAILCALL");
-  for (auto &MO : drop_begin(MI.operands()))
+  for (auto &MO : TCOperands)
     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
       TC.addOperand(*MaybeOperand);
   OutStreamer->emitInstruction(TC, getSubtargetInfo());
+
+  if (IsConditional)
+    OutStreamer->emitLabel(FallthroughLabel);
 }
 
 // Returns instruction preceding MBBI in MachineFunction.
diff --git a/llvm/test/CodeGen/X86/xray-conditional-tail-call.ll b/llvm/test/CodeGen/X86/xray-conditional-tail-call.ll
new file mode 100644
index 0000000000000..9ceea75ca769e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/xray-conditional-tail-call.ll
@@ -0,0 +1,35 @@
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+
+declare void @tail_call_target()
+
+define void @conditional_tail_call(i32 %cond) "function-instrument"="xray-always" nounwind {
+  ; CHECK-LABEL: conditional_tail_call:
+  ; CHECK-NEXT:  .Lfunc_begin0:
+  ; CHECK-NEXT:  # %bb.0:
+  ; CHECK-NEXT:    .p2align 1, 0x90
+  ; CHECK-NEXT:  .Lxray_sled_0:
+  ; CHECK-NEXT:    .ascii "\353\t"
+  ; CHECK-NEXT:    nopw 512(%rax,%rax)
+  ; CHECK-NEXT:    testl %edi, %edi
+  ; CHECK-NEXT:    je .Ltmp0
+  ; CHECK-NEXT:    .p2align 1, 0x90
+  ; CHECK-NEXT:  .Lxray_sled_1:
+  ; CHECK-NEXT:    .ascii "\353\t"
+  ; CHECK-NEXT:    nopw  512(%rax,%rax)
+  ; CHECK-NEXT:  .Ltmp1:
+  ; CHECK-NEXT:    jmp tail_call_target at PLT # TAILCALL
+  ; CHECK-NEXT:  .Ltmp0:
+  ; CHECK-NEXT:  # %bb.1:
+  ; CHECK-NEXT:   .p2align  1, 0x90
+  ; CHECK-NEXT:  .Lxray_sled_2:
+  ; CHECK-NEXT:    retq
+  ; CHECK-NEXT:    nopw %cs:512(%rax,%rax)
+  ; CHECK-NEXT:  .Lfunc_end0:
+  %cmp = icmp ne i32 %cond, 0
+  br i1 %cmp, label %docall, label %ret
+docall:
+  tail call void @tail_call_target()
+  ret void
+ret:
+  ret void
+}

>From 6aa0eae5add9e45a99c2c7d9be1089706ab969b5 Mon Sep 17 00:00:00 2001
From: Ricky Zhou <ricky at rzhou.org>
Date: Mon, 27 May 2024 20:31:42 -0700
Subject: [PATCH 2/2] fixup! [xray] Handle conditional calls when lowering
 patchable tail calls.

Merge xray-conditional-tail-call.ll into xray-tail-call-sled.ll.
---
 .../CodeGen/X86/xray-conditional-tail-call.ll | 35 -------------
 llvm/test/CodeGen/X86/xray-tail-call-sled.ll  | 51 +++++++++++++++++++
 2 files changed, 51 insertions(+), 35 deletions(-)
 delete mode 100644 llvm/test/CodeGen/X86/xray-conditional-tail-call.ll

diff --git a/llvm/test/CodeGen/X86/xray-conditional-tail-call.ll b/llvm/test/CodeGen/X86/xray-conditional-tail-call.ll
deleted file mode 100644
index 9ceea75ca769e..0000000000000
--- a/llvm/test/CodeGen/X86/xray-conditional-tail-call.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
-
-declare void @tail_call_target()
-
-define void @conditional_tail_call(i32 %cond) "function-instrument"="xray-always" nounwind {
-  ; CHECK-LABEL: conditional_tail_call:
-  ; CHECK-NEXT:  .Lfunc_begin0:
-  ; CHECK-NEXT:  # %bb.0:
-  ; CHECK-NEXT:    .p2align 1, 0x90
-  ; CHECK-NEXT:  .Lxray_sled_0:
-  ; CHECK-NEXT:    .ascii "\353\t"
-  ; CHECK-NEXT:    nopw 512(%rax,%rax)
-  ; CHECK-NEXT:    testl %edi, %edi
-  ; CHECK-NEXT:    je .Ltmp0
-  ; CHECK-NEXT:    .p2align 1, 0x90
-  ; CHECK-NEXT:  .Lxray_sled_1:
-  ; CHECK-NEXT:    .ascii "\353\t"
-  ; CHECK-NEXT:    nopw  512(%rax,%rax)
-  ; CHECK-NEXT:  .Ltmp1:
-  ; CHECK-NEXT:    jmp tail_call_target at PLT # TAILCALL
-  ; CHECK-NEXT:  .Ltmp0:
-  ; CHECK-NEXT:  # %bb.1:
-  ; CHECK-NEXT:   .p2align  1, 0x90
-  ; CHECK-NEXT:  .Lxray_sled_2:
-  ; CHECK-NEXT:    retq
-  ; CHECK-NEXT:    nopw %cs:512(%rax,%rax)
-  ; CHECK-NEXT:  .Lfunc_end0:
-  %cmp = icmp ne i32 %cond, 0
-  br i1 %cmp, label %docall, label %ret
-docall:
-  tail call void @tail_call_target()
-  ret void
-ret:
-  ret void
-}
diff --git a/llvm/test/CodeGen/X86/xray-tail-call-sled.ll b/llvm/test/CodeGen/X86/xray-tail-call-sled.ll
index 4d0c359f0dc31..126e5db52a5b7 100644
--- a/llvm/test/CodeGen/X86/xray-tail-call-sled.ll
+++ b/llvm/test/CodeGen/X86/xray-tail-call-sled.ll
@@ -66,3 +66,54 @@ define dso_local i32 @caller() nounwind noinline uwtable "function-instrument"="
 ; CHECK-MACOS:       [[IDX:lxray_fn_idx[0-9]+]]:
 ; CHECK-MACOS-NEXT:    .quad lxray_sleds_start1-[[IDX]]
 ; CHECK-MACOS-NEXT:    .quad 2
+
+define dso_local i32 @conditional_tail_call(i32 %cond) nounwind noinline uwtable "function-instrument"="xray-always" {
+; CHECK-LABEL: conditional_tail_call:
+; CHECK:         .p2align 1, 0x90
+; CHECK-LABEL: Lxray_sled_4:
+; CHECK:         .ascii "\353\t"
+; CHECK-NEXT:    nopw 512(%rax,%rax)
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je {{\.?Ltmp5}}
+; CHECK:         .p2align 1, 0x90
+; CHECK-LABEL: Lxray_sled_5:
+; CHECK-NEXT:    .ascii "\353\t"
+; CHECK-NEXT:    nopw 512(%rax,%rax)
+; CHECK-LABEL: Ltmp6:
+; CHECK-NEXT:    jmp {{.*}}callee {{.*}}# TAILCALL
+; CHECK-LABEL: Ltmp5:
+; CHECK:         xorl %eax, %eax
+; CHECK-NEXT:   .p2align  1, 0x90
+; CHECK-LABEL: Lxray_sled_6:
+; CHECK-NEXT:    retq
+; CHECK-NEXT:    nopw %cs:512(%rax,%rax)
+  %cmp = icmp ne i32 %cond, 0
+  br i1 %cmp, label %docall, label %ret
+docall:
+  %retval = tail call i32 @callee()
+  ret i32 %retval
+ret:
+  ret i32 0
+}
+
+; CHECK-LINUX-LABEL: .section xray_instr_map,"ao", at progbits,conditional_tail_call{{$}}
+; CHECK-LINUX-LABEL: .Lxray_sleds_start2:
+; CHECK-LINUX:         .quad .Lxray_sled_4
+; CHECK-LINUX:         .quad .Lxray_sled_5
+; CHECK-LINUX:         .quad .Lxray_sled_6
+; CHECK-LINUX-LABEL: .Lxray_sleds_end2:
+; CHECK-LINUX-LABEL: .section xray_fn_idx,"ao", at progbits,conditional_tail_call{{$}}
+; CHECK-LINUX:       [[IDX:\.Lxray_fn_idx[0-9]+]]:
+; CHECK-LINUX-NEXT:    .quad .Lxray_sleds_start2-[[IDX]]
+; CHECK-LINUX-NEXT:    .quad 3
+
+; CHECK-MACOS-LABEL: .section __DATA,xray_instr_map,regular,live_support{{$}}
+; CHECK-MACOS-LABEL: lxray_sleds_start2:
+; CHECK-MACOS:         .quad Lxray_sled_4
+; CHECK-MACOS:         .quad Lxray_sled_5
+; CHECK-MACOS:         .quad Lxray_sled_6
+; CHECK-MACOS-LABEL: Lxray_sleds_end2:
+; CHECK-MACOS-LABEL: .section __DATA,xray_fn_idx,regular,live_support{{$}}
+; CHECK-MACOS:       [[IDX:lxray_fn_idx[0-9]+]]:
+; CHECK-MACOS-NEXT:    .quad lxray_sleds_start2-[[IDX]]
+; CHECK-MACOS-NEXT:    .quad 3



More information about the llvm-commits mailing list