[clang] [llvm] [win][x64] Fix import call optimization for calls to dllimports and global function pointers (PR #160604)
Daniel Paoliello via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 14:51:45 PST 2025
https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/160604
>From 78a7bf052fa49eaf9a566aa5b109754300c77a51 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Fri, 12 Dec 2025 09:45:37 -0800
Subject: [PATCH] [win][x64] Fix import call optimization for calls to
dllimports and global function pointers
---
.../CodeGenCXX/microsoft-abi-eh-ip2state.cpp | 2 +-
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 19 +++++
llvm/lib/Target/X86/X86FastISel.cpp | 16 ++--
llvm/lib/Target/X86/X86InstrCompiler.td | 13 ++--
llvm/lib/Target/X86/X86InstrControl.td | 10 ++-
llvm/lib/Target/X86/X86InstrPredicates.td | 6 +-
llvm/lib/Target/X86/X86MCInstLower.cpp | 55 +++++++++-----
.../win-import-call-optimization-cfguard.ll | 75 ++++++++++++++++---
.../win-import-call-optimization-jumptable.ll | 1 +
.../X86/win-import-call-optimization.ll | 56 ++++++++++----
10 files changed, 189 insertions(+), 64 deletions(-)
diff --git a/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp b/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp
index 0b7b406e2ba8e..541789fc9d339 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp
@@ -40,7 +40,7 @@ void case_calls_dll_import() NO_TAIL {
// CHECK: .seh_endprologue
// CHECK: .Limpcall{{[0-9]+}}:
// CHECK-NEXT: rex64
-// CHECK-NEXT: call __imp_some_dll_import
+// CHECK-NEXT: call qword ptr [rip + __imp_some_dll_import]
// CHECK-NEXT: nop dword ptr {{\[.*\]}}
// CHECK-NEXT: nop
// CHECK-NEXT: .seh_startepilogue
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 6a18086cae29f..749fc45bffa50 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -63,6 +63,8 @@ class X86ExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI);
void expandCALL_RVMARKER(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ void expandCALL_ImpCall(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandMBB(MachineBasicBlock &MBB);
@@ -254,6 +256,20 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
std::next(RtCall->getIterator()));
}
+void X86ExpandPseudo::expandCALL_ImpCall(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ // Expand CALL64_ImpCall pseudo to CALL64m.
+ MachineInstr &MI = *MBBI;
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64m))
+ .addReg(X86::RIP)
+ .addImm(1)
+ .addReg(0)
+ .addGlobalAddress(MI.getOperand(0).getGlobal(), 0,
+ MI.getOperand(0).getTargetFlags())
+ .addReg(0);
+ MI.eraseFromParent();
+}
+
/// If \p MBBI is a pseudo instruction, this method expands
/// it to the corresponding (sequence of) actual instruction(s).
/// \returns true if \p MBBI has been expanded.
@@ -710,6 +726,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::CALL64r_ImpCall:
MI.setDesc(TII->get(X86::CALL64r));
return true;
+ case X86::CALL64_ImpCall:
+ expandCALL_ImpCall(MBB, MBBI);
+ return true;
case X86::ADD32mi_ND:
case X86::ADD64mi32_ND:
case X86::SUB32mi_ND:
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index f007886115d35..5f8830469e893 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3317,11 +3317,6 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (Flag.isSwiftError() || Flag.isPreallocated())
return false;
- // Can't handle import call optimization.
- if (Is64Bit &&
- MF->getFunction().getParent()->getModuleFlag("import-call-optimization"))
- return false;
-
SmallVector<MVT, 16> OutVTs;
SmallVector<Type *, 16> ArgTys;
SmallVector<Register, 16> ArgRegs;
@@ -3563,6 +3558,17 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (CalleeOp) {
// Register-indirect call.
unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
+
+ const Module *M = FuncInfo.MF->getFunction().getParent();
+ if (CalleeOp != X86::RAX && Is64Bit &&
+ M->getModuleFlag("import-call-optimization")) {
+ // Import call optimization requires all indirect calls to be via RAX.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
+ TII.get(TargetOpcode::COPY), X86::RAX)
+ .addReg(CalleeOp);
+ CalleeOp = X86::RAX;
+ }
+
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CallOpc))
.addReg(CalleeOp);
} else {
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index ec31675731b79..e4f80af56dee3 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1322,9 +1322,6 @@ def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
(CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;
-def : Pat<(X86imp_call (i64 tglobaladdr:$dst)),
- (CALL64pcrel32 tglobaladdr:$dst)>;
-
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
// can never use callee-saved registers. That is the purpose of the GR64_TC
// register classes.
@@ -1359,25 +1356,25 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off),
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>,
- Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>;
+ Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;
def : Pat<(X86tcret GR64_TCW64:$dst, timm:$off),
(TCRETURN_WIN64ri GR64_TCW64:$dst, timm:$off)>,
- Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>;
+ Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri64_ImpCall ptr_rc_tailcall:$dst, timm:$off)>,
- Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabled]>;
+ Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabledAndCFGuardDisabled]>;
// Don't fold loads into X86tcret requiring more than 6 regs.
// There wouldn't be enough scratch registers for base+index.
def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
(TCRETURNmi64 addr:$dst, timm:$off)>,
- Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls]>;
+ Requires<[In64BitMode, IsNotWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;
def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
(TCRETURN_WINmi64 addr:$dst, timm:$off)>,
- Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls]>;
+ Requires<[IsWin64CCFunc, NotUseIndirectThunkCalls, ImportCallOptimizationDisabledOrCFGuardEnabled]>;
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index e8527cd73abb5..6a0cbfb52dce4 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -331,11 +331,11 @@ let isCall = 1, Uses = [RSP, SSP], SchedRW = [WriteJump] in {
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
"call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
- Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabled]>;
+ Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationDisabledOrCFGuardEnabled]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
"call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[In64BitMode,FavorMemIndirectCall,
- NotUseIndirectThunkCalls]>;
+ NotUseIndirectThunkCalls,ImportCallOptimizationDisabledOrCFGuardEnabled]>;
// Non-tracking calls for IBT, use with caution.
let isCodeGenOnly = 1 in {
@@ -433,9 +433,13 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
PseudoI<(outs), (ins i64imm:$rvfunc, i64i32imm_brtarget:$dst), []>,
Requires<[In64BitMode]>;
+ def CALL64_ImpCall :
+ PseudoI<(outs), (ins i64imm:$dst), [(X86imp_call tglobaladdr:$dst)]>,
+ Requires<[In64BitMode]>;
+
def CALL64r_ImpCall :
PseudoI<(outs), (ins GR64_A:$dst), [(X86call GR64_A:$dst)]>,
- Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabled]>;
+ Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabledAndCFGuardDisabled]>;
}
// Conditional tail calls are similar to the above, but they are branches
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 98104a6fad1a9..cebb312b7d167 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -233,8 +233,10 @@ let RecomputePerFunction = 1 in {
"shouldOptForSize(MF)">;
def NoSSE41_Or_OptForSize : Predicate<"shouldOptForSize(MF) || "
"!Subtarget->hasSSE41()">;
- def ImportCallOptimizationEnabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">;
- def ImportCallOptimizationDisabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">;
+ def ImportCallOptimizationEnabledAndCFGuardDisabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\") &&"
+ "!MF->getFunction().getParent()->getModuleFlag(\"cfguard\")">;
+ def ImportCallOptimizationDisabledOrCFGuardEnabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\") ||"
+ "MF->getFunction().getParent()->getModuleFlag(\"cfguard\")">;
def IsWin64CCFunc : Predicate<"Subtarget->isCallingConvWin64(MF->getFunction().getCallingConv())">;
def IsNotWin64CCFunc : Predicate<"!Subtarget->isCallingConvWin64(MF->getFunction().getCallingConv())">;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 713d504474f5e..529e07c8511dc 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -2343,7 +2343,8 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
case X86::TAILJMPr64_REX: {
if (EnableImportCallOptimization) {
- assert(MI->getOperand(0).getReg() == X86::RAX &&
+ assert((MI->getOperand(0).getReg() == X86::RAX ||
+ MF->getFunction().getParent()->getModuleFlag("cfguard")) &&
"Indirect tail calls with impcall enabled must go through RAX (as "
"enforced by TCRETURNImpCallri64)");
emitLabelAndRecordForImportCallOptimization(
@@ -2544,28 +2545,18 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
if (IndCSPrefix && MI->hasRegisterImplicitUseOperand(X86::R11))
EmitAndCountInstruction(MCInstBuilder(X86::CS_PREFIX));
- if (EnableImportCallOptimization && isImportedFunction(MI->getOperand(0))) {
- emitLabelAndRecordForImportCallOptimization(
- IMAGE_RETPOLINE_AMD64_IMPORT_CALL);
-
- MCInst TmpInst;
- MCInstLowering.Lower(MI, TmpInst);
-
- // For Import Call Optimization to work, we need a the call instruction
- // with a rex prefix, and a 5-byte nop after the call instruction.
- EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
- emitCallInstruction(TmpInst);
- emitNop(*OutStreamer, 5, Subtarget);
- maybeEmitNopAfterCallForWindowsEH(MI);
- return;
- }
+ assert(!EnableImportCallOptimization ||
+ !isImportedFunction(MI->getOperand(0)) &&
+ "Calls to imported functions with import call optimization "
+ "should be lowered to CALL64m via CALL64_ImpCall");
break;
case X86::CALL64r:
if (EnableImportCallOptimization) {
assert(MI->getOperand(0).getReg() == X86::RAX &&
- "Indirect calls with impcall enabled must go through RAX (as "
+ "Indirect calls with import call optimization enabled must go "
+ "through RAX (as "
"enforced by CALL64r_ImpCall)");
emitLabelAndRecordForImportCallOptimization(
@@ -2583,9 +2574,33 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
break;
case X86::CALL64m:
- if (EnableImportCallOptimization && isCallToCFGuardFunction(MI)) {
- emitLabelAndRecordForImportCallOptimization(
- IMAGE_RETPOLINE_AMD64_CFG_CALL);
+ if (EnableImportCallOptimization) {
+ if (isCallToCFGuardFunction(MI)) {
+ emitLabelAndRecordForImportCallOptimization(
+ IMAGE_RETPOLINE_AMD64_CFG_CALL);
+ } else if (isImportedFunction(MI->getOperand(3))) {
+ emitLabelAndRecordForImportCallOptimization(
+ IMAGE_RETPOLINE_AMD64_IMPORT_CALL);
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+
+ // For Import Call Optimization to work, we need a the call instruction
+ // with a rex prefix, and a 5-byte nop after the call instruction.
+ EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
+ emitCallInstruction(TmpInst);
+ // MSVC Linker is *very* picky about the exact nop to use.
+ MCInst Nop = MCInstBuilder(X86::NOOPL)
+ .addReg(X86::RAX)
+ .addImm(1)
+ .addReg(X86::RAX)
+ .addImm(0)
+ .addReg(0);
+ Nop.setFlags(X86::IP_USE_DISP8);
+ EmitAndCountInstruction(Nop);
+ maybeEmitNopAfterCallForWindowsEH(MI);
+ return;
+ }
}
break;
diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll
index 12be910d68ee9..974a5d1e5eba0 100644
--- a/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll
+++ b/llvm/test/CodeGen/X86/win-import-call-optimization-cfguard.ll
@@ -1,13 +1,48 @@
-; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s
+
+; FIXME: FastISel is emitting calls to the CFG dispatch function as indirect
+; calls via registers. Normally this would work, but for Import Call it is the
+; incorrect pattern.
+
+ at global_func_ptr = external dso_local local_unnamed_addr global ptr, align 8
+declare dllimport void @a() local_unnamed_addr
+declare dllimport void @b() local_unnamed_addr
define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" {
entry:
+ call void @a()
+ call void @a()
call void %func_ptr()
+ %0 = load ptr, ptr @global_func_ptr, align 8
+ call void %0()
ret void
}
; CHECK-LABEL: normal_call:
-; CHECK: .Limpcall0:
+; CHECK: movq %rcx, %rsi
+; CHECK-NEXT: .Limpcall0:
+; CHECK-NEXT: rex64
+; CHECK-NEXT: callq *__imp_a(%rip)
+; CHECK-NEXT: nopl (%rax,%rax)
+; CHECK-NEXT: .Limpcall1:
+; CHECK-NEXT: rex64
+; CHECK-NEXT: callq *__imp_a(%rip)
+; CHECK-NEXT: nopl (%rax,%rax)
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: .Limpcall2:
+; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip)
+; CHECK-NEXT: movq global_func_ptr(%rip), %rax
+; CHECK-NEXT: .Limpcall3:
; CHECK-NEXT: callq *__guard_dispatch_icall_fptr(%rip)
+; CHECK-NEXT: nop
+
+define dso_local void @tail_call() local_unnamed_addr section "tc_sect" {
+entry:
+ tail call void @b()
+ ret void
+}
+; CHECK-LABEL: tail_call:
+; CHECK: .Limpcall4:
+; CHECK-NEXT: jmp __imp_b
define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" {
entry:
@@ -15,19 +50,41 @@ entry:
ret void
}
; CHECK-LABEL: tail_call_fp:
-; CHECK: .Limpcall1:
+; CHECK: movq %rcx, %rax
+; CHECK-NEXT: .Limpcall5:
; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip)
+define dso_local void @tail_call_global_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" {
+entry:
+ %0 = load ptr, ptr @global_func_ptr, align 8
+ tail call void %0()
+ ret void
+}
+; CHECK-LABEL: tail_call_global_fp:
+; CHECK: movq global_func_ptr(%rip), %rax
+; CHECK-NEXT: .Limpcall6:
+; CHECK-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip)
+
; CHECK-LABEL .section .retplne,"yi"
; CHECK-NEXT .asciz "RetpolineV1"
-; CHECK-NEXT .long 16
-; CHECK-NEXT .secnum tc_sect
-; CHECK-NEXT .long 10
-; CHECK-NEXT .secoffset .Limpcall1
-; CHECK-NEXT .long 16
+; CHECK-NEXT .long 40
; CHECK-NEXT .secnum nc_sect
-; CHECK-NEXT .long 9
+; CHECK-NEXT .long 3
; CHECK-NEXT .secoffset .Limpcall0
+; CHECK-NEXT .long 3
+; CHECK-NEXT .secoffset .Limpcall1
+; CHECK-NEXT .long 9
+; CHECK-NEXT .secoffset .Limpcall2
+; CHECK-NEXT .long 9
+; CHECK-NEXT .secoffset .Limpcall3
+; CHECK-NEXT .long 32
+; CHECK-NEXT .secnum tc_sect
+; CHECK-NEXT .long 2
+; CHECK-NEXT .secoffset .Limpcall4
+; CHECK-NEXT .long 4
+; CHECK-NEXT .secoffset .Limpcall5
+; CHECK-NEXT .long 4
+; CHECK-NEXT .secoffset .Limpcall6
!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"import-call-optimization", i32 1}
diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll b/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll
index fe22b251685e6..c2389a10415d1 100644
--- a/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll
+++ b/llvm/test/CodeGen/X86/win-import-call-optimization-jumptable.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s
+; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s
; CHECK-LABEL: uses_rax:
; CHECK: .Limpcall0:
diff --git a/llvm/test/CodeGen/X86/win-import-call-optimization.ll b/llvm/test/CodeGen/X86/win-import-call-optimization.ll
index cc7e1a9f81e34..9a91baf99c1d5 100644
--- a/llvm/test/CodeGen/X86/win-import-call-optimization.ll
+++ b/llvm/test/CodeGen/X86/win-import-call-optimization.ll
@@ -1,27 +1,36 @@
-; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc --global-isel --global-isel-abort=2 -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s
+; RUN: llc --fast-isel -mtriple=x86_64-pc-windows-msvc -o - %s | FileCheck %s
+; RUN: llc --global-isel --global-isel-abort=2 -mtriple=x86_64-pc-windows-msvc -o - %s | \
+; RUN: FileCheck %s
+
+ at global_func_ptr = external dso_local local_unnamed_addr global ptr, align 8
define dso_local void @normal_call(ptr noundef readonly %func_ptr) local_unnamed_addr section "nc_sect" {
entry:
call void @a()
call void @a()
call void %func_ptr()
+ %0 = load ptr, ptr @global_func_ptr, align 8
+ call void %0()
ret void
}
; CHECK-LABEL: normal_call:
; CHECK: .Limpcall0:
; CHECK-NEXT: rex64
-; CHECK-NEXT: callq __imp_a
-; CHECK-NEXT: nopl 8(%rax,%rax)
+; CHECK-NEXT: callq *__imp_a(%rip)
+; CHECK-NEXT: nopl (%rax,%rax)
; CHECK-NEXT: .Limpcall1:
; CHECK-NEXT: rex64
-; CHECK-NEXT: callq __imp_a
-; CHECK-NEXT: nopl 8(%rax,%rax)
+; CHECK-NEXT: callq *__imp_a(%rip)
+; CHECK-NEXT: nopl (%rax,%rax)
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: .Limpcall2:
; CHECK-NEXT: callq *%rax
; CHECK-NEXT: nopl (%rax)
+; CHECK-NEXT: movq global_func_ptr(%rip), %rax
+; CHECK-NEXT: .Limpcall3:
+; CHECK-NEXT: callq *%rax
+; CHECK-NEXT: nopl (%rax)
; CHECK-NEXT: nop
define dso_local void @tail_call() local_unnamed_addr section "tc_sect" {
@@ -30,7 +39,7 @@ entry:
ret void
}
; CHECK-LABEL: tail_call:
-; CHECK: .Limpcall3:
+; CHECK: .Limpcall4:
; CHECK-NEXT: jmp __imp_b
define dso_local void @tail_call_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" {
@@ -40,7 +49,18 @@ entry:
}
; CHECK-LABEL: tail_call_fp:
; CHECK: movq %rcx, %rax
-; CHECK-NEXT: .Limpcall4:
+; CHECK-NEXT: .Limpcall5:
+; CHECK-NEXT: rex64 jmpq *%rax
+
+define dso_local void @tail_call_global_fp(ptr noundef readonly %func_ptr) local_unnamed_addr section "tc_sect" {
+entry:
+ %0 = load ptr, ptr @global_func_ptr, align 8
+ tail call void %0()
+ ret void
+}
+; CHECK-LABEL: tail_call_global_fp:
+; CHECK: movq global_func_ptr(%rip), %rax
+; CHECK-NEXT: .Limpcall6:
; CHECK-NEXT: rex64 jmpq *%rax
declare dllimport void @a() local_unnamed_addr
@@ -48,13 +68,7 @@ declare dllimport void @b() local_unnamed_addr
; CHECK-LABEL .section .retplne,"yi"
; CHECK-NEXT .asciz "RetpolineV1"
-; CHECK-NEXT .long 24
-; CHECK-NEXT .secnum tc_sect
-; CHECK-NEXT .long 3
-; CHECK-NEXT .secoffset .Limpcall3
-; CHECK-NEXT .long 5
-; CHECK-NEXT .secoffset .Limpcall4
-; CHECK-NEXT .long 32
+; CHECK-NEXT .long 40
; CHECK-NEXT .secnum nc_sect
; CHECK-NEXT .long 3
; CHECK-NEXT .secoffset .Limpcall0
@@ -62,6 +76,16 @@ declare dllimport void @b() local_unnamed_addr
; CHECK-NEXT .secoffset .Limpcall1
; CHECK-NEXT .long 5
; CHECK-NEXT .secoffset .Limpcall2
+; CHECK-NEXT .long 5
+; CHECK-NEXT .secoffset .Limpcall3
+; CHECK-NEXT .long 32
+; CHECK-NEXT .secnum tc_sect
+; CHECK-NEXT .long 2
+; CHECK-NEXT .secoffset .Limpcall4
+; CHECK-NEXT .long 4
+; CHECK-NEXT .secoffset .Limpcall5
+; CHECK-NEXT .long 4
+; CHECK-NEXT .secoffset .Limpcall6
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"import-call-optimization", i32 1}
More information about the llvm-commits
mailing list