[llvm] [CodeGen][X86] Fix lowering of tailcalls when `-ms-hotpatch` is used (PR #77245)

Alexandre Ganea via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 22 07:33:23 PST 2024


https://github.com/aganea updated https://github.com/llvm/llvm-project/pull/77245

>From 06d10b8d1a0f4bbd592325e1a0f284fd61100b91 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Sun, 7 Jan 2024 13:42:59 -0500
Subject: [PATCH 1/4] [CodeGen][X86] Fix lowering of tailcalls when
 `-ms-hotpatch` is used

Previously, tail jump pseudo-opcodes were skipped by the `encodeInstruction()` call inside `X86AsmPrinter::LowerPATCHABLE_OP`. This caused emission of a 2-byte NOP and dropping of the tail jump

Also, the `PatchableFunction` pass didn't properly update the call site info for the existing MachineInstr, after wrapping it into a `TargetOpcode::PATCHABLE_OP`.
---
 llvm/lib/CodeGen/PatchableFunction.cpp        |  3 +
 llvm/lib/Target/X86/X86MCInstLower.cpp        |  8 ++-
 .../X86/patchable-prologue-tailcall.ll        | 69 +++++++++++++++++++
 3 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll

diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp
index 9449f143366f0f..768e8dff2efc24 100644
--- a/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -87,6 +87,9 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
   for (auto &MO : FirstActualI->operands())
     MIB.add(MO);
 
+  if (FirstActualI->shouldUpdateCallSiteInfo())
+    MF.eraseCallSiteInfo(&*FirstActualI);
+
   FirstActualI->eraseFromParent();
   MF.ensureAlignment(Align(16));
   return true;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e1a67f61e76640..93aa0fc23eb9f2 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -959,7 +959,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
   bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP);
 
   MCInst MCI;
-  MCI.setOpcode(Opcode);
+  // Make sure below we don't encode pseudo tailcalls.
+  MCI.setOpcode(convertTailJumpOpcode(Opcode));
   for (auto &MO : drop_begin(MI.operands(), 2))
     if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
       MCI.addOperand(*MaybeOperand);
@@ -994,8 +995,11 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
       (void)NopSize;
     }
   }
-  if (!EmptyInst)
+  if (!EmptyInst) {
+    if (Opcode != convertTailJumpOpcode(Opcode))
+      OutStreamer->AddComment("TAILCALL");
     OutStreamer->emitInstruction(MCI, getSubtargetInfo());
+  }
 }
 
 // Lower a stackmap of the form:
diff --git a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
new file mode 100644
index 00000000000000..ab3ff5852a74b0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
@@ -0,0 +1,69 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+
+; CHECK: "?mi_new_test@@YAPEAX_K at Z":
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: jmp     mi_new
+
+; CHECK: "?builtin_malloc_test@@YAPEAX_K at Z":
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: jmp     malloc
+
+; // Built with: clang-cl.exe /c patchable-prologue-tailcall.cpp /O2 /hotpatch -Xclang -emit-llvm
+;
+; typedef unsigned long long size_t;
+; 
+; extern "C" {
+;     void* mi_new(size_t size);
+;  }
+;
+; void *mi_new_test(size_t count)
+; {
+;     return mi_new(count);
+; }
+; 
+; void *builtin_malloc_test(size_t count)
+; {
+;     return __builtin_malloc(count);
+; }
+
+; ModuleID = 'patchable-prologue-tailcall.cpp'
+source_filename = "patchable-prologue-tailcall.cpp"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.38.33133"
+
+; Function Attrs: mustprogress nounwind sspstrong uwtable
+define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr #0 {
+entry:
+  %call = tail call ptr @mi_new(i64 noundef %count) #4
+  ret ptr %call
+}
+
+declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable
+define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr #2 {
+entry:
+  %call = tail call ptr @malloc(i64 noundef %count) #4
+  ret ptr %call
+}
+
+; Function Attrs: mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite)
+declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #3
+
+attributes #0 = { mustprogress nounwind sspstrong uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind }
+
+!llvm.linker.options = !{!0, !1}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = !{!"/DEFAULTLIB:libcmt.lib"}
+!1 = !{!"/DEFAULTLIB:oldnames.lib"}
+!2 = !{i32 1, !"wchar_size", i32 2}
+!3 = !{i32 8, !"PIC Level", i32 2}
+!4 = !{i32 7, !"uwtable", i32 2}
+!5 = !{i32 1, !"MaxTLSAlign", i32 65536}
+!6 = !{!"clang version 18.0.0git (https://github.com/llvm/llvm-project.git 0ccf9e29e9626a3a6813b35608c8959178c50a6f)"}

>From 7ef9c22d2f5c62070be981675b74d9d379aa7ce2 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Wed, 10 Jan 2024 08:42:45 -0500
Subject: [PATCH 2/4] Change how
 `"patchable-function"="prologue-short-redirect"`works.

Instead of `PATCHABLE_OP` wrapping the first `MachineInstr`, it now inserts itself before it, leaving the instruction unaltered. At lowering time in `X86AsmPrinter`, we now "look ahead" for the next non-pseudo `MachineInstr` and lower+encode it, to inspect its size. If the size is below what `PATCHABLE_OP` expects, it inserts NOPs; otherwise it does nothing
---
 llvm/include/llvm/Support/TargetOpcodes.def   | 11 +---
 llvm/lib/CodeGen/PatchableFunction.cpp        | 55 ++++---------------
 llvm/lib/Target/X86/X86MCInstLower.cpp        | 41 +++++---------
 .../X86/patchable-prologue-tailcall.ll        | 42 +++++---------
 llvm/test/CodeGen/X86/patchable-prologue.ll   |  4 +-
 5 files changed, 46 insertions(+), 107 deletions(-)

diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 3824b1c66951e8..669cac2a9f7e65 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -170,15 +170,10 @@ HANDLE_TARGET_OPCODE(LOCAL_ESCAPE)
 /// comparisons into existing memory operations.
 HANDLE_TARGET_OPCODE(FAULTING_OP)
 
-/// Wraps a machine instruction to add patchability constraints.  An
-/// instruction wrapped in PATCHABLE_OP has to either have a minimum
+/// Precedes a machine instruction to add patchability constraints.  An
+/// instruction after PATCHABLE_OP has to either have a minimum
 /// size or be preceded with a nop of that size.  The first operand is
-/// an immediate denoting the minimum size of the instruction, the
-/// second operand is an immediate denoting the opcode of the original
-/// instruction.  The rest of the operands are the operands of the
-/// original instruction.
-/// PATCHABLE_OP can be used as second operand to only insert a nop of
-/// required size.
+/// an immediate denoting the minimum size of the instruction.
 HANDLE_TARGET_OPCODE(PATCHABLE_OP)
 
 /// This is a marker instruction which gets translated into a nop sled, useful
diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp
index 768e8dff2efc24..75c2dfeca58d5a 100644
--- a/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -38,61 +38,28 @@ struct PatchableFunction : public MachineFunctionPass {
 }
 
 bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+  MachineBasicBlock &FirstMBB = *MF.begin();
+
   if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
-    MachineBasicBlock &FirstMBB = *MF.begin();
     const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
     // The initial .loc covers PATCHABLE_FUNCTION_ENTER.
     BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
             TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
     return true;
-  }
-
-  if (!MF.getFunction().hasFnAttribute("patchable-function"))
-    return false;
-
+  } else if (MF.getFunction().hasFnAttribute("patchable-function")) {
 #ifndef NDEBUG
-  Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function");
-  StringRef PatchType = PatchAttr.getValueAsString();
-  assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
+    Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function");
+    StringRef PatchType = PatchAttr.getValueAsString();
+    assert(PatchType == "prologue-short-redirect" && "Only possibility today!");
 #endif
-
-  auto &FirstMBB = *MF.begin();
-  auto *TII = MF.getSubtarget().getInstrInfo();
-
-  MachineBasicBlock::iterator FirstActualI = llvm::find_if(
-      FirstMBB, [](const MachineInstr &MI) { return !MI.isMetaInstruction(); });
-
-  if (FirstActualI == FirstMBB.end()) {
-    // As of Microsoft documentation on /hotpatch feature, we must ensure that
-    // "the first instruction of each function is at least two bytes, and no
-    // jump within the function goes to the first instruction"
-
-    // When the first MBB is empty, insert a patchable no-op. This ensures the
-    // first instruction is patchable in two special cases:
-    // - the function is empty (e.g. unreachable)
-    // - the function jumps back to the first instruction, which is in a
-    // successor MBB.
-    BuildMI(&FirstMBB, DebugLoc(), TII->get(TargetOpcode::PATCHABLE_OP))
-        .addImm(2)
-        .addImm(TargetOpcode::PATCHABLE_OP);
+    auto *TII = MF.getSubtarget().getInstrInfo();
+    BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(),
+            TII->get(TargetOpcode::PATCHABLE_OP))
+        .addImm(2);
     MF.ensureAlignment(Align(16));
     return true;
   }
-
-  auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(),
-                     TII->get(TargetOpcode::PATCHABLE_OP))
-                 .addImm(2)
-                 .addImm(FirstActualI->getOpcode());
-
-  for (auto &MO : FirstActualI->operands())
-    MIB.add(MO);
-
-  if (FirstActualI->shouldUpdateCallSiteInfo())
-    MF.eraseCallSiteInfo(&*FirstActualI);
-
-  FirstActualI->eraseFromParent();
-  MF.ensureAlignment(Align(16));
-  return true;
+  return false;
 }
 
 char PatchableFunction::ID = 0;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 93aa0fc23eb9f2..1122d31593c4ef 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -948,25 +948,26 @@ void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
 
 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
                                       X86MCInstLower &MCIL) {
-  // PATCHABLE_OP minsize, opcode, operands
+  // PATCHABLE_OP minsize
 
   NoAutoPaddingScope NoPadScope(*OutStreamer);
 
-  unsigned MinSize = MI.getOperand(0).getImm();
-  unsigned Opcode = MI.getOperand(1).getImm();
-  // Opcode PATCHABLE_OP is a special case: there is no instruction to wrap,
-  // simply emit a nop of size MinSize.
-  bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP);
-
-  MCInst MCI;
-  // Make sure below we don't encode pseudo tailcalls.
-  MCI.setOpcode(convertTailJumpOpcode(Opcode));
-  for (auto &MO : drop_begin(MI.operands(), 2))
-    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
-      MCI.addOperand(*MaybeOperand);
+  // Find the next MachineInstr in this MBB.
+  const MachineInstr *NextMI = MI.getNextNode();
+  while (NextMI) {
+    if (!NextMI->isMetaInstruction())
+      break;
+    NextMI = NextMI->getNextNode();
+  }
 
   SmallString<256> Code;
-  if (!EmptyInst) {
+  unsigned MinSize = MI.getOperand(0).getImm();
+
+  if (NextMI) {
+    // Lower the next MachineInstr to find its byte size.
+    MCInst MCI;
+    MCIL.Lower(NextMI, MCI);
+
     SmallVector<MCFixup, 4> Fixups;
     CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo());
   }
@@ -982,24 +983,12 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
       OutStreamer->emitInstruction(
           MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI),
           *Subtarget);
-    } else if (MinSize == 2 && Opcode == X86::PUSH64r) {
-      // This is an optimization that lets us get away without emitting a nop in
-      // many cases.
-      //
-      // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
-      // bytes too, so the check on MinSize is important.
-      MCI.setOpcode(X86::PUSH64rmr);
     } else {
       unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget);
       assert(NopSize == MinSize && "Could not implement MinSize!");
       (void)NopSize;
     }
   }
-  if (!EmptyInst) {
-    if (Opcode != convertTailJumpOpcode(Opcode))
-      OutStreamer->AddComment("TAILCALL");
-    OutStreamer->emitInstruction(MCI, getSubtargetInfo());
-  }
 }
 
 // Lower a stackmap of the form:
diff --git a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
index ab3ff5852a74b0..38a1ce92ff5588 100644
--- a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
+++ b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
@@ -2,11 +2,11 @@
 
 ; CHECK: "?mi_new_test@@YAPEAX_K at Z":
 ; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: jmp     mi_new
+; CHECK-NEXT: jmp     mi_new                          # TAILCALL
 
 ; CHECK: "?builtin_malloc_test@@YAPEAX_K at Z":
 ; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: jmp     malloc
+; CHECK-NEXT: jmp     malloc                          # TAILCALL
 
 ; // Built with: clang-cl.exe /c patchable-prologue-tailcall.cpp /O2 /hotpatch -Xclang -emit-llvm
 ;
@@ -26,44 +26,30 @@
 ;     return __builtin_malloc(count);
 ; }
 
-; ModuleID = 'patchable-prologue-tailcall.cpp'
-source_filename = "patchable-prologue-tailcall.cpp"
 target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.38.33133"
 
-; Function Attrs: mustprogress nounwind sspstrong uwtable
-define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr #0 {
+define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" {
 entry:
-  %call = tail call ptr @mi_new(i64 noundef %count) #4
+  %call = tail call ptr @mi_new(i64 noundef %count)
   ret ptr %call
 }
 
-declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr #1
+declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr
 
-; Function Attrs: mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable
-define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr #2 {
+define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" {
 entry:
-  %call = tail call ptr @malloc(i64 noundef %count) #4
+  %call = tail call ptr @malloc(i64 noundef %count)
   ret ptr %call
 }
 
-; Function Attrs: mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite)
-declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #3
+declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #0
 
-attributes #0 = { mustprogress nounwind sspstrong uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #2 = { mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #3 = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
-attributes #4 = { nounwind }
+attributes #0 = { allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }
 
-!llvm.linker.options = !{!0, !1}
-!llvm.module.flags = !{!2, !3, !4, !5}
-!llvm.ident = !{!6}
+!llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = !{!"/DEFAULTLIB:libcmt.lib"}
-!1 = !{!"/DEFAULTLIB:oldnames.lib"}
-!2 = !{i32 1, !"wchar_size", i32 2}
-!3 = !{i32 8, !"PIC Level", i32 2}
-!4 = !{i32 7, !"uwtable", i32 2}
-!5 = !{i32 1, !"MaxTLSAlign", i32 65536}
-!6 = !{!"clang version 18.0.0git (https://github.com/llvm/llvm-project.git 0ccf9e29e9626a3a6813b35608c8959178c50a6f)"}
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"uwtable", i32 2}
+!3 = !{i32 1, !"MaxTLSAlign", i32 65536}
diff --git a/llvm/test/CodeGen/X86/patchable-prologue.ll b/llvm/test/CodeGen/X86/patchable-prologue.ll
index f1a39777a40bcc..71a392845fdea3 100644
--- a/llvm/test/CodeGen/X86/patchable-prologue.ll
+++ b/llvm/test/CodeGen/X86/patchable-prologue.ll
@@ -32,7 +32,8 @@ define void @f0() "patchable-function"="prologue-short-redirect" {
 
 define void @f1() "patchable-function"="prologue-short-redirect" "frame-pointer"="all" {
 ; CHECK-LABEL: _f1
-; CHECK-NEXT: ff f5 	pushq	%rbp
+; CHECK-NEXT: 66 90     nop
+; CHECK-NEXT: 55		pushq	%rbp
 
 ; CHECK-ALIGN: 	.p2align	4, 0x90
 ; CHECK-ALIGN: _f1:
@@ -47,6 +48,7 @@ define void @f1() "patchable-function"="prologue-short-redirect" "frame-pointer"
 ; 64: f1:
 ; 64-NEXT: .seh_proc f1
 ; 64-NEXT: # %bb.0:
+; 64-NEXT: xchgw %ax, %ax
 ; 64-NEXT: pushq   %rbp
 		
   ret void

>From f83ecee291542bdc4008b26fc323f0ebe32a6742 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Wed, 10 Jan 2024 08:47:33 -0500
Subject: [PATCH 3/4] Documentation.

---
 llvm/include/llvm/Support/TargetOpcodes.def | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 669cac2a9f7e65..a3c38a56561c1a 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -173,7 +173,7 @@ HANDLE_TARGET_OPCODE(FAULTING_OP)
 /// Precedes a machine instruction to add patchability constraints.  An
 /// instruction after PATCHABLE_OP has to either have a minimum
 /// size or be preceded with a nop of that size.  The first operand is
-/// an immediate denoting the minimum size of the instruction.
+/// an immediate denoting the minimum size of the following instruction.
 HANDLE_TARGET_OPCODE(PATCHABLE_OP)
 
 /// This is a marker instruction which gets translated into a nop sled, useful

>From 342c4d4bd0ab6eb32dd1e2fe39eb10eefbc0a474 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alex_toresh at yahoo.fr>
Date: Mon, 22 Jan 2024 10:33:08 -0500
Subject: [PATCH 4/4] Simplify test and change code as suggested

---
 llvm/lib/Target/X86/X86MCInstLower.cpp        | 14 +++----
 .../X86/patchable-prologue-tailcall.ll        | 41 +++++--------------
 2 files changed, 15 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 1122d31593c4ef..e4d1d13cc31597 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -952,21 +952,17 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
 
   NoAutoPaddingScope NoPadScope(*OutStreamer);
 
-  // Find the next MachineInstr in this MBB.
-  const MachineInstr *NextMI = MI.getNextNode();
-  while (NextMI) {
-    if (!NextMI->isMetaInstruction())
-      break;
-    NextMI = NextMI->getNextNode();
-  }
+  auto NextMI = std::find_if(std::next(MI.getIterator()),
+                             MI.getParent()->end().getInstrIterator(),
+                             [](auto &II) { return !II.isMetaInstruction(); });
 
   SmallString<256> Code;
   unsigned MinSize = MI.getOperand(0).getImm();
 
-  if (NextMI) {
+  if (NextMI != MI.getParent()->end()) {
     // Lower the next MachineInstr to find its byte size.
     MCInst MCI;
-    MCIL.Lower(NextMI, MCI);
+    MCIL.Lower(&*NextMI, MCI);
 
     SmallVector<MCFixup, 4> Fixups;
     CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo());
diff --git a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
index 38a1ce92ff5588..5e55524fecc9a1 100644
--- a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
+++ b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll
@@ -1,49 +1,28 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
 
-; CHECK: "?mi_new_test@@YAPEAX_K at Z":
+; CHECK: f1:
 ; CHECK-NEXT: # %bb.0:
-; CHECK-NEXT: jmp     mi_new                          # TAILCALL
+; CHECK-NEXT: jmp     f0                          # TAILCALL
 
-; CHECK: "?builtin_malloc_test@@YAPEAX_K at Z":
+; CHECK: f2:
 ; CHECK-NEXT: # %bb.0:
 ; CHECK-NEXT: jmp     malloc                          # TAILCALL
 
-; // Built with: clang-cl.exe /c patchable-prologue-tailcall.cpp /O2 /hotpatch -Xclang -emit-llvm
-;
-; typedef unsigned long long size_t;
-; 
-; extern "C" {
-;     void* mi_new(size_t size);
-;  }
-;
-; void *mi_new_test(size_t count)
-; {
-;     return mi_new(count);
-; }
-; 
-; void *builtin_malloc_test(size_t count)
-; {
-;     return __builtin_malloc(count);
-; }
-
-target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc19.38.33133"
-
-define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" {
+define ptr @f1(i64 %count) "patchable-function"="prologue-short-redirect" {
 entry:
-  %call = tail call ptr @mi_new(i64 noundef %count)
+  %call = tail call ptr @f0(i64 %count)
   ret ptr %call
 }
 
-declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr
+declare ptr @f0(i64)
 
-define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K at Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" {
+define noalias ptr @f2(i64 %count) "patchable-function"="prologue-short-redirect" {
 entry:
-  %call = tail call ptr @malloc(i64 noundef %count)
+  %call = tail call ptr @malloc(i64 %count)
   ret ptr %call
 }
 
-declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #0
+declare noalias ptr @malloc(i64) #0
 
 attributes #0 = { allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }
 



More information about the llvm-commits mailing list