[llvm] [win][x64] Allow push/pop for stack alloc when unwind v2 is required (PR #153621)

Daniel Paoliello via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 14 16:28:55 PDT 2025


https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/153621

>From 5223af5bd2f12e7e238af4d6bc100023c99882d3 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Wed, 13 Aug 2025 14:58:50 -0700
Subject: [PATCH] [win][x64] Don't use push/pop for stack alloc when unwind v2
 is required

---
 llvm/lib/Target/X86/X86WinEHUnwindV2.cpp      |  72 ++++---
 .../CodeGen/X86/win64-eh-unwindv2-errors.mir  |  67 +++---
 ...win64-eh-unwindv2-push-pop-stack-alloc.mir | 199 ++++++++++++++++++
 3 files changed, 272 insertions(+), 66 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir

diff --git a/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp b/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp
index e9081a4ae4e72..7640d7090949c 100644
--- a/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp
+++ b/llvm/lib/Target/X86/X86WinEHUnwindV2.cpp
@@ -201,15 +201,11 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) {
                 "The epilog is deallocating a stack "
                 "allocation, but the prolog did "
                 "not allocate one");
-          if (HasStackDealloc)
+          if (PoppedRegCount > 0)
             return rejectCurrentFunctionInternalError(
                 MF, Mode,
-                "The epilog is deallocating the stack "
-                "allocation more than once");
-          if (PoppedRegCount > 0)
-            llvm_unreachable(
-                "Should have raised an error: either popping before "
-                "deallocating or deallocating without an allocation");
+                "The epilog is deallocating a stack allocation after popping "
+                "registers");
 
           HasStackDealloc = true;
         } else if (State == FunctionState::FinishedEpilog)
@@ -219,33 +215,41 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) {
 
       case X86::POP64r:
         if (State == FunctionState::InEpilog) {
-          // After the stack pointer has been adjusted, the epilog must
-          // POP each register in reverse order of the PUSHes in the prolog.
-          PoppedRegCount++;
-          if (HasStackAlloc != HasStackDealloc)
-            return rejectCurrentFunctionInternalError(
-                MF, Mode,
-                "Cannot pop registers before the stack "
-                "allocation has been deallocated");
-          if (PoppedRegCount > PushedRegs.size())
-            return rejectCurrentFunctionInternalError(
-                MF, Mode,
-                "The epilog is popping more registers than the prolog pushed");
-          if (PushedRegs[PushedRegs.size() - PoppedRegCount] !=
-              MI.getOperand(0).getReg())
-            return rejectCurrentFunctionInternalError(
-                MF, Mode,
-                "The epilog is popping a registers in "
-                "a different order than the "
-                "prolog pushed them");
-
-          // Unwind v2 records the size of the epilog not from where we place
-          // SEH_BeginEpilogue (as that contains the instruction to adjust the
-          // stack pointer) but from the first POP instruction (if there is
-          // one).
-          if (!UnwindV2StartLocation) {
-            assert(PoppedRegCount == 1);
-            UnwindV2StartLocation = &MI;
+          Register Reg = MI.getOperand(0).getReg();
+          if (HasStackAlloc && (PoppedRegCount == 0) &&
+              !llvm::is_contained(PushedRegs, Reg)) {
+            // If this is a pop that doesn't correspond to the set of pushed
+            // registers, then assume it was used to adjust the stack pointer.
+            HasStackDealloc = true;
+          } else {
+            // After the stack pointer has been adjusted, the epilog must
+            // POP each register in reverse order of the PUSHes in the prolog.
+            PoppedRegCount++;
+            if (HasStackAlloc != HasStackDealloc)
+              return rejectCurrentFunctionInternalError(
+                  MF, Mode,
+                  "Cannot pop registers before the stack "
+                  "allocation has been deallocated");
+            if (PoppedRegCount > PushedRegs.size())
+              return rejectCurrentFunctionInternalError(
+                  MF, Mode,
+                  "The epilog is popping more registers than the prolog "
+                  "pushed");
+            if (PushedRegs[PushedRegs.size() - PoppedRegCount] != Reg)
+              return rejectCurrentFunctionInternalError(
+                  MF, Mode,
+                  "The epilog is popping a registers in "
+                  "a different order than the "
+                  "prolog pushed them");
+
+            // Unwind v2 records the size of the epilog not from where we place
+            // SEH_BeginEpilogue (as that contains the instruction to adjust the
+            // stack pointer) but from the first POP instruction (if there is
+            // one).
+            if (!UnwindV2StartLocation) {
+              assert(PoppedRegCount == 1);
+              UnwindV2StartLocation = &MI;
+            }
           }
         } else if (State == FunctionState::FinishedEpilog)
           // Unexpected instruction after the epilog.
diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir
index f099d4fddcb33..ed97e52f2d5c5 100644
--- a/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir
+++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir
@@ -97,38 +97,6 @@ body:             |
     RET64
 ...
 
-;--- double_dealloc.mir
-# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - %t/double_dealloc.mir \
-# RUN:    -run-pass=x86-wineh-unwindv2 2>&1 | FileCheck %s \
-# RUN:    --check-prefix=DOUBLE-DEALLOC
-# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/double_dealloc.mir \
-# RUN:    -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
-# RUN:    FileCheck %s --check-prefix=BESTEFFORT
-# DOUBLE-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'double_dealloc':
-# DOUBLE-DEALLOC-SAME: The epilog is deallocating the stack allocation more than once
-
---- |
-  define dso_local void @double_dealloc() local_unnamed_addr {
-  entry:
-    ret void
-  }
-  !llvm.module.flags = !{!0}
-  !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
-...
----
-name:            double_dealloc
-body:             |
-  bb.0.entry:
-    $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags
-    frame-setup SEH_StackAlloc 40
-    frame-setup SEH_EndPrologue
-    SEH_BeginEpilogue
-    $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags
-    $rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags
-    SEH_EndEpilogue
-    RET64
-...
-
 ;--- dealloc_after_epilog.mir
 # RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
 # RUN:    %t/dealloc_after_epilog.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
@@ -316,3 +284,38 @@ body:             |
     $ecx = MOV32rr killed $eax
     RET64
 ...
+
+;--- dealloc_pop_dealloc.mir
+# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
+# RUN:    %t/dealloc_pop_dealloc.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
+# RUN:    FileCheck %s --check-prefix=DEALLOC-POP-DEALLOC
+# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/dealloc_pop_dealloc.mir \
+# RUN:    -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
+# RUN:    FileCheck %s --check-prefix=BESTEFFORT
+# DEALLOC-POP-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'dealloc_pop_dealloc':
+# DEALLOC-POP-DEALLOC-SAME: The epilog is deallocating a stack allocation after popping registers
+
+--- |
+  define dso_local void @dealloc_pop_dealloc() local_unnamed_addr {
+  entry:
+    ret void
+  }
+  !llvm.module.flags = !{!0}
+  !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
+...
+---
+name:            dealloc_pop_dealloc
+body:             |
+  bb.0.entry:
+    frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_PushReg 55
+    $rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags
+    frame-setup SEH_StackAlloc 40
+    frame-setup SEH_EndPrologue
+    SEH_BeginEpilogue
+    $rsp = frame-destroy ADD64ri32 $rsp, 20, implicit-def dead $eflags
+    $rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    $rsp = frame-destroy ADD64ri32 $rsp, 20, implicit-def dead $eflags
+    SEH_EndEpilogue
+    RET64
+...
diff --git a/llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir b/llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir
new file mode 100644
index 0000000000000..09a839f00814a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/win64-eh-unwindv2-push-pop-stack-alloc.mir
@@ -0,0 +1,199 @@
+# RUN: llc -o - %s -mtriple=x86_64-unknown-windows-msvc \
+# RUN:  -run-pass=x86-wineh-unwindv2 | FileCheck %s
+
+# Regression test for Win x64 unwind v2: in some cases it is better to use
+# push+pop to adjust the stack, rather than sub+add. This is permitted with
+# unwind v2 as the requirement is that the epilog finishes adjusting the stack
+# before popping the registers listed in the unwind table.
+
+# Pushes and pops the same register.
+# CHECK-LABEL:  name:            push_pop_same
+# CHECK:        body:
+# CHECK-NEXT:     bb.0
+# CHECK-NEXT:       SEH_UnwindVersion 2
+# CHECK-NEXT:       frame-setup PUSH64r undef $rax
+# CHECK-NEXT:       frame-setup SEH_StackAlloc 8
+# CHECK-NEXT:       frame-setup SEH_EndPrologue
+# CHECK-NEXT:       SEH_BeginEpilogue
+# CHECK-NEXT:       $rax = frame-destroy
+# CHECK-NEXT:       SEH_UnwindV2Start
+# CHECK-NEXT:       SEH_EndEpilogue
+# CHECK-NEXT:       RET64
+
+# Pushes and pops a different register.
+# CHECK-LABEL:  name:            push_pop_different
+# CHECK:        body:
+# CHECK-NEXT:     bb.0
+# CHECK-NEXT:       SEH_UnwindVersion 2
+# CHECK-NEXT:       frame-setup PUSH64r undef $rax
+# CHECK-NEXT:       frame-setup SEH_StackAlloc 8
+# CHECK-NEXT:       frame-setup SEH_EndPrologue
+# CHECK:            SEH_BeginEpilogue
+# CHECK-NEXT:       $rcx = frame-destroy POP64r
+# CHECK-NEXT:       SEH_UnwindV2Start
+# CHECK-NEXT:       SEH_EndEpilogue
+# CHECK-NEXT:       RET64 $eax
+
+# Pushes in the prolog, adds in the epilog.
+# CHECK-LABEL:  name:            push_add
+# CHECK:        body:
+# CHECK-NEXT:     bb.0
+# CHECK-NEXT:       SEH_UnwindVersion 2
+# CHECK-NEXT:       frame-setup PUSH64r killed $r15
+# CHECK-NEXT:       frame-setup SEH_PushReg 126
+# CHECK-NEXT:       frame-setup PUSH64r killed $r14
+# CHECK-NEXT:       frame-setup SEH_PushReg 125
+# CHECK-NEXT:       frame-setup PUSH64r killed $rsi
+# CHECK-NEXT:       frame-setup SEH_PushReg 60
+# CHECK-NEXT:       frame-setup PUSH64r killed $rdi
+# CHECK-NEXT:       frame-setup SEH_PushReg 55
+# CHECK-NEXT:       frame-setup PUSH64r killed $rbx
+# CHECK-NEXT:       frame-setup SEH_PushReg 53
+# CHECK-NEXT:       frame-setup PUSH64r undef $rax
+# CHECK-NEXT:       frame-setup SEH_StackAlloc 8
+# CHECK-NEXT:       frame-setup SEH_EndPrologue
+# CHECK:            SEH_BeginEpilogue
+# CHECK-NEXT:       $rsp = frame-destroy ADD64ri32 $rsp, 8
+# CHECK-NEXT:       SEH_UnwindV2Start
+# CHECK-NEXT:       $rbx = frame-destroy POP64r
+# CHECK-NEXT:       $rdi = frame-destroy POP64r
+# CHECK-NEXT:       $rsi = frame-destroy POP64r
+# CHECK-NEXT:       $r14 = frame-destroy POP64r
+# CHECK-NEXT:       $r15 = frame-destroy POP64r
+# CHECK-NEXT:       SEH_EndEpilogue
+# CHECK-NEXT:       RET64
+
+--- |
+  define void @push_pop_same() {
+    %small_alloca = alloca i32, align 4
+    ret void
+  }
+  
+  define i32 @push_pop_different(i32 %x, i32 %y) {
+    %small_alloca = alloca i32, align 4
+    %sum = add i32 %x, %y
+    ret i32 %sum
+  }
+  
+  define void @push_add(ptr %a, ptr %b, ptr %out) {
+    %small_alloca = alloca i32, align 4
+    %av = load i256, ptr %a, align 16
+    %bv = load i256, ptr %b, align 16
+    %r = mul i256 %av, %bv
+    store i256 %r, ptr %out, align 16
+    ret void
+  }
+  
+  !llvm.module.flags = !{!0}
+  
+  !0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
+...
+---
+name:            push_pop_same
+body:             |
+  bb.0 (%ir-block.0):
+    frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_StackAlloc 8
+    frame-setup SEH_EndPrologue
+    SEH_BeginEpilogue
+    $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    SEH_EndEpilogue
+    RET64
+...
+---
+name:            push_pop_different
+body:             |
+  bb.0 (%ir-block.0):
+    frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_StackAlloc 8
+    frame-setup SEH_EndPrologue
+    renamable $edx = KILL $edx, implicit-def $rdx
+    renamable $ecx = KILL $ecx, implicit-def $rcx
+    renamable $eax = LEA64_32r killed renamable $rcx, 1, killed renamable $rdx, 0, $noreg
+    SEH_BeginEpilogue
+    $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    SEH_EndEpilogue
+    RET64 $eax
+...
+---
+name:            push_add
+body:             |
+  bb.0 (%ir-block.0):
+
+    frame-setup PUSH64r killed $r15, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_PushReg 126
+    frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_PushReg 125
+    frame-setup PUSH64r killed $rsi, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_PushReg 60
+    frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_PushReg 55
+    frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_PushReg 53
+    frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
+    frame-setup SEH_StackAlloc 8
+    frame-setup SEH_EndPrologue
+    $rsi = MOV64rr $rdx
+    renamable $r9 = MOV64rm renamable $rcx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.a, align 16)
+    renamable $rdi = MOV64rm renamable $rcx, 1, $noreg, 8, $noreg :: (load (s64) from %ir.a + 8, basealign 16)
+    renamable $rbx = MOV64rm renamable $rcx, 1, $noreg, 16, $noreg :: (load (s64) from %ir.a + 16, align 16)
+    renamable $r10 = MOV64rm $rdx, 1, $noreg, 16, $noreg :: (load (s64) from %ir.b + 16, align 16)
+    renamable $r11 = MOV64rm $rdx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.b, align 16)
+    renamable $r14 = MOV64rm $rdx, 1, $noreg, 8, $noreg :: (load (s64) from %ir.b + 8, basealign 16)
+    renamable $r15 = MOV64rm killed renamable $rcx, 1, $noreg, 24, $noreg :: (load (s64) from %ir.a + 24, basealign 16)
+    renamable $r15 = IMUL64rr killed renamable $r15, renamable $r11, implicit-def dead $eflags
+    $rax = MOV64rr $r11
+    MUL64r renamable $rbx, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
+    $rcx = MOV64rr $rax
+    renamable $rbx = IMUL64rr killed renamable $rbx, renamable $r14, implicit-def dead $eflags
+    renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $rdx, implicit-def dead $eflags
+    renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $r15, implicit-def dead $eflags
+    $r15 = MOV64rr $r10
+    renamable $r15 = IMUL64rr killed renamable $r15, renamable $rdi, implicit-def dead $eflags
+    $rax = MOV64rr killed $r10
+    MUL64r renamable $r9, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
+    $r10 = MOV64rr $rax
+    renamable $rdx = ADD64rr killed renamable $rdx, killed renamable $r15, implicit-def dead $eflags
+    renamable $r15 = MOV64rm killed renamable $rsi, 1, $noreg, 24, $noreg :: (load (s64) from %ir.b + 24, basealign 16)
+    renamable $r15 = IMUL64rr killed renamable $r15, renamable $r9, implicit-def dead $eflags
+    renamable $r15 = ADD64rr killed renamable $r15, killed renamable $rdx, implicit-def dead $eflags
+    renamable $r10 = ADD64rr killed renamable $r10, killed renamable $rcx, implicit-def $eflags
+    renamable $r15 = ADC64rr killed renamable $r15, killed renamable $rbx, implicit-def dead $eflags, implicit killed $eflags
+    $rax = MOV64rr $r9
+    MUL64r renamable $r11, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
+    $rcx = MOV64rr $rdx
+    $rsi = MOV64rr $rax
+    $rax = MOV64rr $rdi
+    MUL64r killed renamable $r11, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
+    $r11 = MOV64rr $rdx
+    $rbx = MOV64rr $rax
+    renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $rcx, implicit-def $eflags
+    renamable $r11 = ADC64ri32 killed renamable $r11, 0, implicit-def dead $eflags, implicit killed $eflags
+    $rax = MOV64rr killed $r9
+    MUL64r renamable $r14, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
+    $rcx = MOV64rr $rdx
+    $r9 = MOV64rr $rax
+    renamable $r9 = ADD64rr killed renamable $r9, killed renamable $rbx, implicit-def $eflags
+    renamable $rcx = ADC64rr killed renamable $rcx, killed renamable $r11, implicit-def $eflags, implicit killed $eflags
+    renamable $al = SETCCr 2, implicit killed $eflags
+    renamable $r11d = MOVZX32rr8 killed renamable $al, implicit-def $r11
+    $rax = MOV64rr killed $rdi
+    MUL64r killed renamable $r14, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
+    renamable $rax = ADD64rr killed renamable $rax, killed renamable $rcx, implicit-def $eflags
+    renamable $rdx = ADC64rr killed renamable $rdx, killed renamable $r11, implicit-def dead $eflags, implicit killed $eflags
+    renamable $rax = ADD64rr killed renamable $rax, killed renamable $r10, implicit-def $eflags
+    renamable $rdx = ADC64rr killed renamable $rdx, killed renamable $r15, implicit-def dead $eflags, implicit killed $eflags
+    MOV64mr renamable $r8, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store (s64) into %ir.out, align 16)
+    MOV64mr renamable $r8, 1, $noreg, 8, $noreg, killed renamable $r9 :: (store (s64) into %ir.out + 8, basealign 16)
+    MOV64mr renamable $r8, 1, $noreg, 16, $noreg, killed renamable $rax :: (store (s64) into %ir.out + 16, align 16)
+    MOV64mr killed renamable $r8, 1, $noreg, 24, $noreg, killed renamable $rdx :: (store (s64) into %ir.out + 24, basealign 16)
+    SEH_BeginEpilogue
+    $rsp = frame-destroy ADD64ri32 $rsp, 8, implicit-def dead $eflags
+    $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    $rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    $rsi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    $r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp
+    SEH_EndEpilogue
+    RET64
+...



More information about the llvm-commits mailing list