[llvm] [WIP] Aarch64: Always emit .seh_endprologue (PR #158173)
Fabrice de Gans via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 18:01:46 PDT 2025
https://github.com/Steelskin created https://github.com/llvm/llvm-project/pull/158173
In some cases, with very simple thunks, it is possible that the `.seh_endprologue` is not emitted. This causes issues in the assembler because the epilogue ends up starting before the prologue has ended.
>From 546dee61c861b3fea4b846b4c809fff5925c236b Mon Sep 17 00:00:00 2001
From: Fabrice de Gans <fabrice at thebrowser.company>
Date: Thu, 11 Sep 2025 16:55:35 -0700
Subject: [PATCH] Aarch64: Always emit .seh_endprologue
In some cases, with very simple thunks, it is possible that the
`.seh_endprologue` is not emitted. This causes issues in the assembler
because the epilogue ends up starting before the prologue has ended.
---
.../AArch64/AArch64PrologueEpilogue.cpp | 8 +-
.../AArch64/seh-minimal-prologue-epilogue.ll | 85 +++++++++++++++++++
2 files changed, 90 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/seh-minimal-prologue-epilogue.ll
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index af424987b8ddb..967dc675a1f30 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -341,7 +341,9 @@ void AArch64PrologueEmitter::emitPrologue() {
// The very last FrameSetup instruction indicates the end of prologue. Emit a
// SEH opcode indicating the prologue end.
- if (NeedsWinCFI && HasWinCFI) {
+ // Always emit SEH_PrologEnd if we need WinCFI, even if no other SEH instructions
+ // were emitted, to ensure .seh_endprologue appears before .seh_startepilogue.
+ if (NeedsWinCFI) {
BuildMI(MBB, AfterSVESavesI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -463,7 +465,7 @@ void AArch64PrologueEmitter::emitEmptyStackFramePrologue(
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
if (!NumBytes) {
- if (NeedsWinCFI && HasWinCFI) {
+ if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -538,7 +540,7 @@ void AArch64PrologueEmitter::emitFramePointerSetup(
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
StackOffset::getFixed(FPOffset), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (NeedsWinCFI && HasWinCFI) {
+ if (NeedsWinCFI) {
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
.setMIFlag(MachineInstr::FrameSetup);
// After setting up the FP, the rest of the prolog doesn't need to be
diff --git a/llvm/test/CodeGen/AArch64/seh-minimal-prologue-epilogue.ll b/llvm/test/CodeGen/AArch64/seh-minimal-prologue-epilogue.ll
new file mode 100644
index 0000000000000..f6e9301698f6d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/seh-minimal-prologue-epilogue.ll
@@ -0,0 +1,85 @@
+; RUN: llc -mtriple=aarch64-windows %s -o - | FileCheck %s
+
+; This test verifies that functions requiring Windows CFI that have minimal
+; or no prologue instructions still emit proper SEH directives, specifically
+; ensuring .seh_endprologue is emitted before .seh_startepilogue.
+;
+; This reproduces the issue where Swift async functions with swifttailcc
+; calling convention would fail with:
+; "error: starting epilogue (.seh_startepilogue) before prologue has ended (.seh_endprologue)"
+
+; Test 1: Swift-style tail call function with minimal prologue
+define swifttailcc void @test_swifttailcc_minimal(ptr %async_ctx, ptr %arg1, ptr %arg2) {
+; CHECK-LABEL: test_swifttailcc_minimal:
+; CHECK-NEXT: .seh_proc test_swifttailcc_minimal
+; CHECK: .seh_endprologue
+; CHECK: .seh_startepilogue
+; CHECK: .seh_endepilogue
+; CHECK: .seh_endproc
+entry:
+ %ptr1 = getelementptr inbounds i8, ptr %async_ctx, i64 16
+ %ptr2 = getelementptr inbounds i8, ptr %async_ctx, i64 24
+ store ptr %arg1, ptr %ptr1, align 8
+ store ptr %arg2, ptr %ptr2, align 8
+ musttail call swifttailcc void @external_swift_function(ptr %async_ctx, ptr %arg1)
+ ret void
+}
+
+; Test 2: Regular function with no stack frame but needs epilogue
+define void @test_no_stack_frame() {
+; CHECK-LABEL: test_no_stack_frame:
+; CHECK-NEXT: .seh_proc test_no_stack_frame
+; CHECK: .seh_endprologue
+; CHECK: .seh_startepilogue
+; CHECK: .seh_endepilogue
+; CHECK: .seh_endproc
+entry:
+ call void @external_function()
+ ret void
+}
+
+; Test 3: Function with minimal stack adjustment only in epilogue
+define void @test_minimal_stack_adjust(ptr %ptr) {
+; CHECK-LABEL: test_minimal_stack_adjust:
+; CHECK-NEXT: .seh_proc test_minimal_stack_adjust
+; CHECK: .seh_endprologue
+; CHECK: .seh_startepilogue
+; CHECK: add sp, sp, #16
+; CHECK: .seh_stackalloc 16
+; CHECK: .seh_endepilogue
+; CHECK: .seh_endproc
+entry:
+ %local = alloca i64, align 8
+ store i64 42, ptr %local, align 8
+ %value = load i64, ptr %local, align 8
+ store i64 %value, ptr %ptr, align 8
+ ret void
+}
+
+; Test 4: Function similar to the original failing case
+define linkonce_odr hidden swifttailcc void @test_linkonce_swifttailcc(ptr swiftasync %async_ctx, ptr %arg1, ptr noalias dereferenceable(40) %arg2, ptr %arg3, i64 %value, ptr %arg4, ptr %arg5, ptr %arg6, i1 %flag, ptr %arg7, ptr noalias dereferenceable(40) %arg8) comdat {
+; CHECK-LABEL: test_linkonce_swifttailcc:
+; CHECK-NEXT: .seh_proc test_linkonce_swifttailcc
+; CHECK: .seh_endprologue
+; CHECK: .seh_startepilogue
+; CHECK: .seh_endepilogue
+; CHECK: .seh_endproc
+entry:
+ %frame_ptr = getelementptr inbounds nuw i8, ptr %async_ctx, i64 16
+ %ctx1 = getelementptr inbounds nuw i8, ptr %async_ctx, i64 400
+ %ctx2 = getelementptr inbounds nuw i8, ptr %async_ctx, i64 1168
+ %spill1 = getelementptr inbounds nuw i8, ptr %async_ctx, i64 2392
+ store ptr %arg8, ptr %spill1, align 8
+ %spill2 = getelementptr inbounds nuw i8, ptr %async_ctx, i64 2384
+ store ptr %arg7, ptr %spill2, align 8
+ %spill3 = getelementptr inbounds nuw i8, ptr %async_ctx, i64 2225
+ store i1 %flag, ptr %spill3, align 1
+ %spill4 = getelementptr inbounds nuw i8, ptr %async_ctx, i64 2376
+ store ptr %arg6, ptr %spill4, align 8
+ musttail call swifttailcc void @external_swift_continuation(ptr swiftasync %async_ctx, i64 0, i64 0)
+ ret void
+}
+
+declare swifttailcc void @external_swift_function(ptr, ptr)
+declare swifttailcc void @external_swift_continuation(ptr swiftasync, i64, i64)
+declare void @external_function()
More information about the llvm-commits
mailing list