[llvm] eb3b7dd - X86: Fix win64 tail call regression for tail call to loaded pointer (#158055)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 11 13:12:05 PDT 2025


Author: Matt Arsenault
Date: 2025-09-11T23:12:01+03:00
New Revision: eb3b7ddc697c379894dc9b09b158697d44f7c25b

URL: https://github.com/llvm/llvm-project/commit/eb3b7ddc697c379894dc9b09b158697d44f7c25b
DIFF: https://github.com/llvm/llvm-project/commit/eb3b7ddc697c379894dc9b09b158697d44f7c25b.diff

LOG: X86: Fix win64 tail call regression for tail call to loaded pointer (#158055)

Fix regression after 62f2641d603db9aef99dd5c434a1dfe7d3f56346. Previous
patch handled the register case, but the memory case snuck another use
of ptr_rc_tailcall hidden inside i64mem_TC

Added: 
    llvm/test/CodeGen/X86/win64-tailcall-memory.ll

Modified: 
    llvm/lib/Target/X86/X86AsmPrinter.cpp
    llvm/lib/Target/X86/X86ExpandPseudo.cpp
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/lib/Target/X86/X86InstrCompiler.td
    llvm/lib/Target/X86/X86InstrControl.td
    llvm/lib/Target/X86/X86InstrOperands.td
    llvm/lib/Target/X86/X86RegisterInfo.cpp
    llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index ff22ee8c86fac..a7734e9200a19 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -478,9 +478,9 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) {
          Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 ||
          Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
          Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNmi ||
-         Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 ||
-         Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TAILJMPr64_REX ||
-         Opc == X86::TAILJMPm64_REX;
+         Opc == X86::TCRETURN_WINmi64 || Opc == X86::TCRETURNri64 ||
+         Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURNri64_ImpCall ||
+         Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX;
 }
 
 void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {

diff  --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 9457e718de699..4a9b824b0db14 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -276,8 +276,10 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case X86::TCRETURNdi64cc:
   case X86::TCRETURNri64:
   case X86::TCRETURNri64_ImpCall:
-  case X86::TCRETURNmi64: {
-    bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
+  case X86::TCRETURNmi64:
+  case X86::TCRETURN_WINmi64: {
+    bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 ||
+                 Opcode == X86::TCRETURN_WINmi64;
     MachineOperand &JumpTarget = MBBI->getOperand(0);
     MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
                                                          : 1);
@@ -341,7 +343,8 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
         MIB.addImm(MBBI->getOperand(2).getImm());
       }
 
-    } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
+    } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 ||
+               Opcode == X86::TCRETURN_WINmi64) {
       unsigned Op = (Opcode == X86::TCRETURNmi)
                         ? X86::TAILJMPm
                         : (IsX64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);

diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index a293b4c87cfe4..08c9d738baceb 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2402,7 +2402,7 @@ static bool isTailCallOpcode(unsigned Opc) {
          Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
          Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
          Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
-         Opc == X86::TCRETURNmi64;
+         Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
 }
 
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,

diff  --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 5a0df058b27f6..af7a33abaf758 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1364,15 +1364,19 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
 // There wouldn't be enough scratch registers for base+index.
 def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
           (TCRETURNmi64 addr:$dst, timm:$off)>,
-          Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
+          Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls]>;
+
+def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
+          (TCRETURN_WINmi64 addr:$dst, timm:$off)>,
+          Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>;
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
           (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
-          Requires<[In64BitMode, UseIndirectThunkCalls]>;
+          Requires<[In64BitMode, IsNotWin64CCFunc, UseIndirectThunkCalls]>;
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
           (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>,
-          Requires<[Not64BitMode, UseIndirectThunkCalls]>;
+          Requires<[Not64BitMode, IsNotWin64CCFunc, UseIndirectThunkCalls]>;
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off),
           (TCRETURNdi64 tglobaladdr:$dst, timm:$off)>,
@@ -2215,7 +2219,7 @@ let Predicates = [HasZU] in {
   def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
             (SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
 }
-  
+
 // mul reg, imm
 def : Pat<(mul GR16:$src1, imm:$src2),
           (IMUL16rri GR16:$src1, imm:$src2)>;

diff  --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 139aedd473ebc..d962bfff1444d 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -372,6 +372,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
   def TCRETURNmi64   : PseudoI<(outs),
                                (ins i64mem_TC:$dst, i32imm:$offset),
                                []>, Sched<[WriteJumpLd]>;
+  def TCRETURN_WINmi64   : PseudoI<(outs),
+                               (ins i64mem_w64TC:$dst, i32imm:$offset),
+                               []>, Sched<[WriteJumpLd]>;
 
   def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_brtarget:$dst),
                            []>, Sched<[WriteJump]>;

diff  --git a/llvm/lib/Target/X86/X86InstrOperands.td b/llvm/lib/Target/X86/X86InstrOperands.td
index 53a6b7c4c4c92..80843f6bb80e6 100644
--- a/llvm/lib/Target/X86/X86InstrOperands.td
+++ b/llvm/lib/Target/X86/X86InstrOperands.td
@@ -141,6 +141,11 @@ def i64mem_TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> {
                        ptr_rc_tailcall, i32imm, SEGMENT_REG);
 }
 
+def i64mem_w64TC : X86MemOperand<"printqwordmem", X86Mem64AsmOperand, 64> {
+  let MIOperandInfo = (ops GR64_TCW64, i8imm,
+                       GR64_TCW64, i32imm, SEGMENT_REG);
+}
+
 // Special parser to detect 16-bit mode to select 16-bit displacement.
 def X86AbsMemMode16AsmOperand : AsmOperandClass {
   let Name = "AbsMemMode16";

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 9ec04e740a08b..7963dc1b755c9 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1010,6 +1010,7 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
   case X86::TCRETURNri64:
   case X86::TCRETURNri64_ImpCall:
   case X86::TCRETURNmi64:
+  case X86::TCRETURN_WINmi64:
   case X86::EH_RETURN:
   case X86::EH_RETURN64: {
     LiveRegUnits LRU(*this);

diff  --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index 4cc456ece77e0..c28de14a97874 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -893,6 +893,7 @@ void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
       case X86::TAILJMPm64_REX:
       case X86::TAILJMPm:
       case X86::TCRETURNmi64:
+      case X86::TCRETURN_WINmi64:
       case X86::TCRETURNmi: {
         // Use the generic unfold logic now that we know we're dealing with
         // expected instructions.

diff  --git a/llvm/test/CodeGen/X86/win64-tailcall-memory.ll b/llvm/test/CodeGen/X86/win64-tailcall-memory.ll
new file mode 100644
index 0000000000000..568f4fe04fea9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win64-tailcall-memory.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=x86_64-unknown-windows-gnu < %s | FileCheck %s
+
+; Check calling convention is correct for win64 when doing a tailcall
+; for a pointer loaded from memory.
+
+declare void @foo(i64, ptr)
+
+define void @do_tailcall(ptr %objp) nounwind {
+; CHECK-LABEL: do_tailcall:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rsi
+; CHECK-NEXT:    subq $32, %rsp
+; CHECK-NEXT:    movq %rcx, %rsi
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    callq foo
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    addq $32, %rsp
+; CHECK-NEXT:    popq %rsi
+; CHECK-NEXT:    rex64 jmpq *(%rax) # TAILCALL
+  tail call void @foo(i64 0, ptr null)
+  %fptr = load ptr, ptr %objp, align 8
+  tail call void %fptr(ptr null)
+  ret void
+}
+
+; Make sure aliases of ccc are also treated as win64 functions
+define fastcc void @do_tailcall_fastcc(ptr %objp) nounwind {
+; CHECK-LABEL: do_tailcall_fastcc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rsi
+; CHECK-NEXT:    subq $32, %rsp
+; CHECK-NEXT:    movq %rcx, %rsi
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    callq foo
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    movq %rsi, %rax
+; CHECK-NEXT:    addq $32, %rsp
+; CHECK-NEXT:    popq %rsi
+; CHECK-NEXT:    rex64 jmpq *(%rax) # TAILCALL
+  tail call void @foo(i64 0, ptr null)
+  %fptr = load ptr, ptr %objp, align 8
+  tail call fastcc void %fptr(ptr null)
+  ret void
+}


        


More information about the llvm-commits mailing list