[llvm] [win][x64] Guarantee shape of tail call to a control flow guard function (PR #174108)

Wed Dec 31 13:38:53 PST 2025

https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/174108

>From 0babb94cdde22a7b77781d277166d026f95f17e8 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Tue, 23 Dec 2025 10:00:41 -0800
Subject: [PATCH] [win][x64] Guarantee shape of tail call to a control flow
 guard function

---
 llvm/include/llvm/Transforms/CFGuard.h        |  2 +
 llvm/lib/Target/X86/X86AsmPrinter.cpp         |  3 +-
 llvm/lib/Target/X86/X86ExpandPseudo.cpp       | 32 ++++++++++++-
 llvm/lib/Target/X86/X86FrameLowering.cpp      |  3 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  2 +
 llvm/lib/Target/X86/X86ISelLowering.h         |  8 ++++
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp   | 23 +++++++++-
 llvm/lib/Target/X86/X86InstrControl.td        |  9 ++++
 llvm/lib/Target/X86/X86InstrFragments.td      |  7 +++
 llvm/lib/Target/X86/X86RegisterInfo.cpp       |  1 +
 llvm/lib/Transforms/CFGuard/CFGuard.cpp       |  5 ++
 .../Inputs/reference_x86_vocab_print.txt      |  1 +
 .../reference_x86_vocab_wo=0.5_print.txt      |  1 +
 llvm/test/CodeGen/X86/cfguard-checks.ll       | 46 +++++++++++++++----
 14 files changed, 131 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/CFGuard.h b/llvm/include/llvm/Transforms/CFGuard.h
index b81db8f487965..62e1195c8222e 100644
--- a/llvm/include/llvm/Transforms/CFGuard.h
+++ b/llvm/include/llvm/Transforms/CFGuard.h
@@ -15,6 +15,7 @@
 
 namespace llvm {
 
+class CallBase;
 class FunctionPass;
 class GlobalValue;
 
@@ -35,6 +36,7 @@ FunctionPass *createCFGuardCheckPass();
 /// Insert Control FLow Guard dispatches on indirect function calls.
 FunctionPass *createCFGuardDispatchPass();
 
+bool isCFGuardCall(const CallBase *CB);
 bool isCFGuardFunction(const GlobalValue *GV);
 
 } // namespace llvm
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 84b921222a116..0275d3179cb60 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -480,7 +480,8 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) {
          Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNmi ||
          Opc == X86::TCRETURN_WINmi64 || Opc == X86::TCRETURNri64 ||
          Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURNri64_ImpCall ||
-         Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX;
+         Opc == X86::TAILJMPr64_REX || Opc == X86::TAILJMPm64_REX ||
+         Opc == X86::TCRETURNmi64_GlobalAddr;
 }
 
 void X86AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 6a18086cae29f..1d62c6f378e1d 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -63,6 +63,8 @@ class X86ExpandPseudo : public MachineFunctionPass {
                                MachineBasicBlock::iterator MBBI);
   void expandCALL_RVMARKER(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI);
+  void expandCallToGlobalAddr(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
   bool expandMBB(MachineBasicBlock &MBB);
 
@@ -254,6 +256,20 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
                    std::next(RtCall->getIterator()));
 }
 
+void X86ExpandPseudo::expandCallToGlobalAddr(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MBBI) {
+  // Expand CALL64m_GlobalAddress pseudo to CALL64m.
+  MachineInstr &MI = *MBBI;
+  BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64m))
+      .addReg(X86::RIP)
+      .addImm(1)
+      .addReg(0)
+      .addGlobalAddress(MI.getOperand(0).getGlobal(), 0,
+                        MI.getOperand(0).getTargetFlags())
+      .addReg(0);
+  MI.eraseFromParent();
+}
+
 /// If \p MBBI is a pseudo instruction, this method expands
 /// it to the corresponding (sequence of) actual instruction(s).
 /// \returns true if \p MBBI has been expanded.
@@ -277,7 +293,8 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case X86::TCRETURNri64:
   case X86::TCRETURNri64_ImpCall:
   case X86::TCRETURNmi64:
-  case X86::TCRETURN_WINmi64: {
+  case X86::TCRETURN_WINmi64:
+  case X86::TCRETURNmi64_GlobalAddr: {
     bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64 ||
                  Opcode == X86::TCRETURN_WINmi64;
     MachineOperand &JumpTarget = MBBI->getOperand(0);
@@ -358,6 +375,16 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
       BuildMI(MBB, MBBI, DL,
               TII->get(IsX64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
           .add(JumpTarget);
+    } else if (Opcode == X86::TCRETURNmi64_GlobalAddr) {
+      assert(IsX64 &&
+             "TCRETURNmi_GlobalAddr is currently only supported on x64");
+      BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPm64_REX))
+          .addReg(X86::RIP)
+          .addImm(1)
+          .addReg(0)
+          .addGlobalAddress(JumpTarget.getGlobal(), 0,
+                            JumpTarget.getTargetFlags())
+          .addReg(0);
     } else {
       assert(!IsX64 && "Win64 and UEFI64 require REX for indirect jumps.");
       JumpTarget.setIsKill();
@@ -710,6 +737,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case X86::CALL64r_ImpCall:
     MI.setDesc(TII->get(X86::CALL64r));
     return true;
+  case X86::CALL64m_GlobalAddress:
+    expandCallToGlobalAddr(MBB, MBBI);
+    return true;
   case X86::ADD32mi_ND:
   case X86::ADD64mi32_ND:
   case X86::SUB32mi_ND:
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 8bca6344d6521..832c05952e776 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2401,7 +2401,8 @@ static bool isTailCallOpcode(unsigned Opc) {
          Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
          Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
          Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
-         Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64;
+         Opc == X86::TCRETURNmi64 || Opc == X86::TCRETURN_WINmi64 ||
+         Opc == X86::TCRETURNmi64_GlobalAddr;
 }
 
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3f1db1a500e05..ec9fe31e82ddf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35657,6 +35657,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CVTTP2UIS)
   NODE_NAME_CASE(MCVTTP2UIS)
   NODE_NAME_CASE(POP_FROM_X87_REG)
+  NODE_NAME_CASE(TC_RETURN_GLOBALADDR)
+  NODE_NAME_CASE(CALL_GLOBALADDR)
   }
   return nullptr;
 #undef NODE_NAME_CASE
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index a528c311975d8..c7a7888aaeede 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -81,6 +81,10 @@ namespace llvm {
     // marker instruction.
     CALL_RVMARKER,
 
+    // Psuedo for a call to a global address that must be called via a memory
+    // address (i.e., not loaded into a register then called).
+    CALL_GLOBALADDR,
+
     /// The same as ISD::CopyFromReg except that this node makes it explicit
     /// that it may lower to an x87 FPU stack pop. Optimizations should be more
     /// cautious when handling this node than a normal CopyFromReg to avoid
@@ -336,6 +340,10 @@ namespace llvm {
     /// the list of operands.
     TC_RETURN,
 
+    // Psuedo for a tail call return to a global address that must be called via
+    // a memory address (i.e., not loaded into a register then called).
+    TC_RETURN_GLOBALADDR,
+
     // Vector move to low scalar and zero higher vector elements.
     VZEXT_MOVL,
 
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index ae9d0a162011f..ac97308919f20 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/CFGuard.h"
 
 #define DEBUG_TYPE "x86-isel"
 
@@ -2431,6 +2432,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   }
 
   bool IsImpCall = false;
+  bool IsCFGuardCall = false;
   if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
     assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
     // In the 64-bit large code model, we have to make all calls
@@ -2448,6 +2450,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
              Callee.getValueType() == MVT::i32) {
     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
     Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
+  } else if (Is64Bit && CB && isCFGuardCall(CB)) {
+    // We'll use a specific psuedo instruction for tail calls to control flow
+    // guard functions to guarantee the instruction used for the call. To do
+    // this we need to unwrap the load now and use the CFG Func GV as the
+    // callee.
+    IsCFGuardCall = true;
+    auto LoadNode = cast<LoadSDNode>(Callee);
+    GlobalAddressSDNode *GA =
+        cast<GlobalAddressSDNode>(unwrapAddress(LoadNode->getBasePtr()));
+    assert(isCFGuardFunction(GA->getGlobal()) &&
+           "CFG Call should be to a guard function");
+    assert(LoadNode->getOffset()->isUndef() &&
+           "CFG Function load should not have an offset");
+    Callee = DAG.getTargetGlobalAddress(
+        GA->getGlobal(), dl, GA->getValueType(0), 0, X86II::MO_NO_FLAG);
   }
 
   SmallVector<SDValue, 8> Ops;
@@ -2552,7 +2569,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // should be computed from returns not tail calls.  Consider a void
     // function making a tail call to a function returning int.
     MF.getFrameInfo().setHasTailCall();
-    SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
+    auto Opcode =
+        IsCFGuardCall ? X86ISD::TC_RETURN_GLOBALADDR : X86ISD::TC_RETURN;
+    SDValue Ret = DAG.getNode(Opcode, dl, MVT::Other, Ops);
 
     if (IsCFICall)
       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
@@ -2568,6 +2587,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
   } else if (IsNoTrackIndirectCall) {
     Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
+  } else if (IsCFGuardCall) {
+    Chain = DAG.getNode(X86ISD::CALL_GLOBALADDR, dl, NodeTys, Ops);
   } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
     // Calls with a "clang.arc.attachedcall" bundle are special. They should be
     // expanded to the call, directly followed by a special marker sequence and
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index e8527cd73abb5..d6bf39dc7b964 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -376,6 +376,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                                (ins i64mem_w64TC:$dst, i32imm:$offset),
                                []>, Sched<[WriteJumpLd]>;
 
+  let isPseudo = 1, mayLoad = 1 in
+  def TCRETURNmi64_GlobalAddr   : PseudoI<(outs),
+                               (ins i64imm:$dst, i32imm:$offset),
+                               [(X86tcret_globaladdr tglobaladdr:$dst, timm:$offset)]>, Sched<[WriteJumpLd]>;
+
   def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_brtarget:$dst),
                            []>, Sched<[WriteJump]>;
 
@@ -436,6 +441,10 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
   def CALL64r_ImpCall :
     PseudoI<(outs), (ins GR64_A:$dst), [(X86call GR64_A:$dst)]>,
             Requires<[In64BitMode,NotUseIndirectThunkCalls,ImportCallOptimizationEnabled]>;
+
+  def CALL64m_GlobalAddress :
+    PseudoI<(outs), (ins i64imm:$dst), [(X86call_globaladdr tglobaladdr:$dst)]>,
+             Requires<[In64BitMode]>;
 }
 
 // Conditional tail calls are similar to the above, but they are branches
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index 116986a0fffea..38ab02667317e 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -210,6 +210,10 @@ def X86call_rvmarker  : SDNode<"X86ISD::CALL_RVMARKER",     SDT_X86Call,
                         [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
                          SDNPVariadic]>;
 
+def X86call_globaladdr  : SDNode<"X86ISD::CALL_GLOBALADDR",     SDT_X86Call,
+                        [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                         SDNPVariadic]>;
+
 def X86imp_call  : SDNode<"X86ISD::IMP_CALL",     SDT_X86Call,
                         [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
                          SDNPVariadic]>;
@@ -259,6 +263,9 @@ def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH",
 def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
                         [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
+def X86tcret_globaladdr : SDNode<"X86ISD::TC_RETURN_GLOBALADDR", SDT_X86TCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
 def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
 def X86sub_flag  : SDNode<"X86ISD::SUB",  SDTBinaryArithWithFlags>;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 72f38133e21ff..9000c84378d75 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -986,6 +986,7 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
   case X86::TCRETURNri64_ImpCall:
   case X86::TCRETURNmi64:
   case X86::TCRETURN_WINmi64:
+  case X86::TCRETURNmi64_GlobalAddr:
   case X86::EH_RETURN:
   case X86::EH_RETURN64: {
     LiveRegUnits LRU(*this);
diff --git a/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
index 46456706d46a1..5c2d9ddaa76db 100644
--- a/llvm/lib/Transforms/CFGuard/CFGuard.cpp
+++ b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -313,6 +313,11 @@ FunctionPass *llvm::createCFGuardDispatchPass() {
   return new CFGuard(CFGuardPass::Mechanism::Dispatch);
 }
 
+bool llvm::isCFGuardCall(const CallBase *CB) {
+  return CB->getCallingConv() == CallingConv::CFGuard_Check ||
+         CB->countOperandBundlesOfType(LLVMContext::OB_cfguardtarget);
+}
+
 bool llvm::isCFGuardFunction(const GlobalValue *GV) {
   if (GV->getLinkage() != GlobalValue::ExternalLinkage)
     return false;
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
index 62e07445ad12e..a7826fce09dbc 100644
--- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
@@ -1711,6 +1711,7 @@ Key: TAILJMPm:  [ 0.00  0.00 ]
 Key: TAILJMPr:  [ 0.00  0.00 ]
 Key: TCMMIMFP:  [ 0.00  0.00 ]
 Key: TCMMRLFP:  [ 0.00  0.00 ]
+Key: TCRETURN_CFG:  [ 0.00  0.00 ]
 Key: TCRETURN_HIPE:  [ 0.00  0.00 ]
 Key: TCRETURN_WIN:  [ 0.00  0.00 ]
 Key: TCRETURN_WINmi:  [ 0.00  0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
index 03a3fafc6b801..0849ca20c0d7f 100644
--- a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
@@ -1711,6 +1711,7 @@ Key: TAILJMPm:  [ 0.00  0.00 ]
 Key: TAILJMPr:  [ 0.00  0.00 ]
 Key: TCMMIMFP:  [ 0.00  0.00 ]
 Key: TCMMRLFP:  [ 0.00  0.00 ]
+Key: TCRETURN_CFG:  [ 0.00  0.00 ]
 Key: TCRETURN_HIPE:  [ 0.00  0.00 ]
 Key: TCRETURN_WIN:  [ 0.00  0.00 ]
 Key: TCRETURN_WINmi:  [ 0.00  0.00 ]
diff --git a/llvm/test/CodeGen/X86/cfguard-checks.ll b/llvm/test/CodeGen/X86/cfguard-checks.ll
index 3a2de718e8a1b..2013e9e66dbf4 100644
--- a/llvm/test/CodeGen/X86/cfguard-checks.ll
+++ b/llvm/test/CodeGen/X86/cfguard-checks.ll
@@ -56,8 +56,7 @@ entry:
   ; On x86_64, __guard_dispatch_icall_fptr tail calls the function, so there should be only one call instruction.
   ; X64-LABEL: func_optnone_cf
   ; X64:       leaq	target_func(%rip), %rax
-  ; X64:       movq __guard_dispatch_icall_fptr(%rip), %rcx
-  ; X64:       callq *%rcx
+  ; X64:       callq *__guard_dispatch_icall_fptr(%rip)
   ; X64-NOT:   callq
 }
 attributes #1 = { noinline optnone }
@@ -125,6 +124,38 @@ lpad:                                             ; preds = %entry
 
 declare void @h()
 
+; Regression test: even if the invoke has many arguments, we should be calling
+; via a rip wrapper, rather than loading the CFG func into a register.
+define i32 @invoke_many_args(ptr %0, ptr %1, ptr %2) personality ptr @h {
+  %4 = alloca ptr, align 8
+  %5 = alloca ptr, align 8
+  %6 = alloca ptr, align 8
+  invoke void %0(ptr %1, ptr %2, ptr %4, ptr %5, ptr %6)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 2
+
+lpad:
+  %tmp = landingpad { ptr, i32 }
+          catch ptr null
+  ret i32 -1
+
+  ; On i686, the call to __guard_check_icall_fptr should come immediately before the call to the target function.
+  ; X86-LABEL: invoke_many_args
+  ; X86:         calll *___guard_check_icall_fptr
+  ; X86_MINGW-NEXT: Ltmp3:
+  ; X86:         calll *%ecx
+  ; X86:       # %invoke.cont
+  ; X86:       # %lpad
+
+  ; On x86_64, __guard_dispatch_icall_fptr tail calls the function, so there should be only one call instruction.
+  ; X64-LABEL: invoke_many_args
+  ; X64:       callq *__guard_dispatch_icall_fptr(%rip)
+  ; X64-NOT:   callq
+  ; X64:       # %invoke.cont
+  ; X64:       # %lpad
+}
 
 ; Test that Control Flow Guard preserves floating point arguments.
 declare double @target_func_doubles(double, double, double, double)
@@ -152,10 +183,10 @@ entry:
   ; X64_MSVC:  movsd __real at 4000000000000000(%rip), %xmm1
   ; X64_MSVC:  movsd __real at 4008000000000000(%rip), %xmm2
   ; X64_MSVC:  movsd __real at 4010000000000000(%rip), %xmm3
-  ; X64_MINGW: movsd .LCPI4_0(%rip), %xmm0
-  ; X64_MINGW: movsd .LCPI4_1(%rip), %xmm1
-  ; X64_MINGW: movsd .LCPI4_2(%rip), %xmm2
-  ; X64_MINGW: movsd .LCPI4_3(%rip), %xmm3
+  ; X64_MINGW: movsd .LCPI5_0(%rip), %xmm0
+  ; X64_MINGW: movsd .LCPI5_1(%rip), %xmm1
+  ; X64_MINGW: movsd .LCPI5_2(%rip), %xmm2
+  ; X64_MINGW: movsd .LCPI5_3(%rip), %xmm3
   ; X64:       callq *__guard_dispatch_icall_fptr(%rip)
   ; X64-NOT:   callq
 
@@ -213,8 +244,7 @@ entry:
   ; X64-LABEL: vmptr_thunk:
   ; X64:            movq (%rcx), %rax
   ; X64-NEXT:       movq 8(%rax), %rax
-  ; X64-NEXT:       movq __guard_dispatch_icall_fptr(%rip), %rdx
-  ; X64-NEXT:       rex64 jmpq *%rdx            # TAILCALL
+  ; X64-NEXT:       rex64 jmpq      *__guard_dispatch_icall_fptr(%rip)            # TAILCALL
   ; X64-NOT:   callq
 }