[llvm] [X86] Fix overflow with large stack probes on x86-64 (PR #113219)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 13:47:36 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (mconst)
<details>
<summary>Changes</summary>
When emitting an inline stack probe loop, we can't use SUBri to calculate the loop bound if it doesn't fit in a 32-bit (possibly sign-extended) immediate.
Fixes #<!-- -->113218.
---
Full diff: https://github.com/llvm/llvm-project/pull/113219.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86FrameLowering.cpp (+35-10)
- (added) llvm/test/CodeGen/X86/stack-clash-huge.ll (+84)
``````````diff
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index a35b04606e595d..a083f5919837f1 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -798,18 +798,43 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
: Is64Bit ? X86::R11D
: X86::EAX;
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
- .addReg(StackPtr)
- .setMIFlag(MachineInstr::FrameSetup);
-
// save loop bound
{
- const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
- const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
- BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
- .addReg(FinalStackProbed)
- .addImm(BoundOffset)
- .setMIFlag(MachineInstr::FrameSetup);
+ const uint64_t BoundOffset = alignDown(Offset, StackProbeSize);
+
+ // Can we calculate the loop bound using SUB with a 32-bit immediate?
+ // Note that the immediate gets sign-extended when used with a 64-bit
+ // register, so in that case we only have 31 bits to work with.
+ bool canUseSub =
+ Uses64BitFramePtr ? isUInt<31>(BoundOffset) : isUInt<32>(BoundOffset);
+
+ if (canUseSub) {
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
+
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
+ .addReg(FinalStackProbed)
+ .addImm(BoundOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else if (Uses64BitFramePtr) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), FinalStackProbed)
+ .addImm(-BoundOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), FinalStackProbed)
+ .addReg(FinalStackProbed)
+ .addReg(StackPtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ // We're being asked to probe a stack frame that's 4 GiB or larger,
+ // but our stack pointer is only 32 bits.
+ DiagnosticInfoResourceLimit Diag(MF.getFunction(),
+ "probed stack frame size", BoundOffset,
+ 0xffffffff, DS_Error, DK_ResourceLimit);
+ MF.getFunction().getContext().diagnose(Diag);
+ return;
+ }
// while in the loop, use loop-invariant reg for CFI,
// instead of the stack pointer, which changes during the loop
diff --git a/llvm/test/CodeGen/X86/stack-clash-huge.ll b/llvm/test/CodeGen/X86/stack-clash-huge.ll
new file mode 100644
index 00000000000000..03f028dfc25067
--- /dev/null
+++ b/llvm/test/CodeGen/X86/stack-clash-huge.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
+; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
+; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
+; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
+
+define i32 @foo() local_unnamed_addr #0 {
+; CHECK-X64-LABEL: foo:
+; CHECK-X64: # %bb.0:
+; CHECK-X64-NEXT: movabsq $-2399997952, %r11 # imm = 0xFFFFFFFF70F2F000
+; CHECK-X64-NEXT: addq %rsp, %r11
+; CHECK-X64-NEXT: .cfi_def_cfa_register %r11
+; CHECK-X64-NEXT: .cfi_adjust_cfa_offset 2399997952
+; CHECK-X64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X64-NEXT: subq $4096, %rsp # imm = 0x1000
+; CHECK-X64-NEXT: movq $0, (%rsp)
+; CHECK-X64-NEXT: cmpq %r11, %rsp
+; CHECK-X64-NEXT: jne .LBB0_1
+; CHECK-X64-NEXT: # %bb.2:
+; CHECK-X64-NEXT: subq $1928, %rsp # imm = 0x788
+; CHECK-X64-NEXT: .cfi_def_cfa_register %rsp
+; CHECK-X64-NEXT: .cfi_def_cfa_offset 2399999888
+; CHECK-X64-NEXT: movl $1, 264(%rsp)
+; CHECK-X64-NEXT: movl $1, 28664(%rsp)
+; CHECK-X64-NEXT: movl -128(%rsp), %eax
+; CHECK-X64-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
+; CHECK-X64-NEXT: addq %rcx, %rsp
+; CHECK-X64-NEXT: .cfi_def_cfa_offset 8
+; CHECK-X64-NEXT: retq
+;
+; CHECK-X86-LABEL: foo:
+; CHECK-X86: # %bb.0:
+; CHECK-X86-NEXT: movl %esp, %eax
+; CHECK-X86-NEXT: subl $2399997952, %eax # imm = 0x8F0D1000
+; CHECK-X86-NEXT: .cfi_def_cfa_register %eax
+; CHECK-X86-NEXT: .cfi_adjust_cfa_offset 2399997952
+; CHECK-X86-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X86-NEXT: subl $4096, %esp # imm = 0x1000
+; CHECK-X86-NEXT: movl $0, (%esp)
+; CHECK-X86-NEXT: cmpl %eax, %esp
+; CHECK-X86-NEXT: jne .LBB0_1
+; CHECK-X86-NEXT: # %bb.2:
+; CHECK-X86-NEXT: subl $2060, %esp # imm = 0x80C
+; CHECK-X86-NEXT: .cfi_def_cfa_register %esp
+; CHECK-X86-NEXT: .cfi_def_cfa_offset 2400000016
+; CHECK-X86-NEXT: movl $1, 392(%esp)
+; CHECK-X86-NEXT: movl $1, 28792(%esp)
+; CHECK-X86-NEXT: movl (%esp), %eax
+; CHECK-X86-NEXT: movl $2400000012, %ecx # imm = 0x8F0D180C
+; CHECK-X86-NEXT: addl %ecx, %esp
+; CHECK-X86-NEXT: .cfi_def_cfa_offset 4
+; CHECK-X86-NEXT: retl
+;
+; CHECK-X32-LABEL: foo:
+; CHECK-X32: # %bb.0:
+; CHECK-X32-NEXT: movl %esp, %r11d
+; CHECK-X32-NEXT: subl $2399997952, %r11d # imm = 0x8F0D1000
+; CHECK-X32-NEXT: .cfi_def_cfa_register %r11
+; CHECK-X32-NEXT: .cfi_adjust_cfa_offset 2399997952
+; CHECK-X32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; CHECK-X32-NEXT: subl $4096, %esp # imm = 0x1000
+; CHECK-X32-NEXT: movq $0, (%esp)
+; CHECK-X32-NEXT: cmpl %r11d, %esp
+; CHECK-X32-NEXT: jne .LBB0_1
+; CHECK-X32-NEXT: # %bb.2:
+; CHECK-X32-NEXT: subl $1928, %esp # imm = 0x788
+; CHECK-X32-NEXT: .cfi_def_cfa_register %rsp
+; CHECK-X32-NEXT: .cfi_def_cfa_offset 2399999888
+; CHECK-X32-NEXT: movl $1, 264(%esp)
+; CHECK-X32-NEXT: movl $1, 28664(%esp)
+; CHECK-X32-NEXT: movl -128(%esp), %eax
+; CHECK-X32-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
+; CHECK-X32-NEXT: addq %rcx, %esp
+; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
+; CHECK-X32-NEXT: retq
+ %a = alloca i32, i64 600000000, align 16
+ %b0 = getelementptr inbounds i32, ptr %a, i64 98
+ %b1 = getelementptr inbounds i32, ptr %a, i64 7198
+ store volatile i32 1, ptr %b0
+ store volatile i32 1, ptr %b1
+ %c = load volatile i32, ptr %a
+ ret i32 %c
+}
+
+attributes #0 = {"probe-stack"="inline-asm"}
``````````
</details>
https://github.com/llvm/llvm-project/pull/113219
More information about the llvm-commits
mailing list