[llvm] [LLVM] [X86] Fix integer overflows in frame layout for huge frames (PR #101840)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 3 11:41:26 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Wesley Wiser (wesleywiser)
<details>
<summary>Changes</summary>
Fix 32-bit integer overflows in the X86 target frame layout when dealing with frames larger than 4gb. When this occurs, we'll scavenge a scratch register to be able to hold the correct stack offset for frame locals.
This completes reapplying #<!-- -->84114.
Fixes #<!-- -->48911
Fixes #<!-- -->75944
Fixes #<!-- -->87154
---
Full diff: https://github.com/llvm/llvm-project/pull/101840.diff
8 Files Affected:
- (modified) llvm/lib/CodeGen/PrologEpilogInserter.cpp (+1-1)
- (modified) llvm/lib/Target/X86/X86FrameLowering.cpp (+10-1)
- (modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (+31-6)
- (modified) llvm/lib/Target/X86/X86RegisterInfo.h (+9)
- (added) llvm/test/CodeGen/X86/avx512f-large-stack.ll (+23)
- (modified) llvm/test/CodeGen/X86/huge-stack-offset.ll (+13-29)
- (modified) llvm/test/CodeGen/X86/huge-stack.ll (+6-5)
- (modified) llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll (+1-1)
``````````diff
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cd5d877e53d82..db0c12c9a7b5a 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1545,7 +1545,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TRI.eliminateFrameIndex(MI, SPAdj, i);
+ TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index bdc9a0d29670a..c28fe2a979910 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/EHPersonalities.h"
@@ -2616,7 +2617,7 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
// object.
// We need to factor in additional offsets applied during the prologue to the
// frame, base, and stack pointer depending on which is used.
- int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
+ int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t StackSize = MFI.getStackSize();
@@ -4140,6 +4141,14 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
// emitPrologue if it gets called and emits CFI.
MF.setHasWinCFI(false);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ // If the frame is big enough that we might need to scavenge a register to
+ // handle huge offsets, reserve a stack slot for that now.
+ if (STI.is64Bit() && !isInt<32>(MFI.estimateStackSize(MF))) {
+ int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false);
+ RS->addScavengingFrameIndex(FI);
+ }
+
// If we are using Windows x64 CFI, ensure that the stack is always 8 byte
// aligned. The format doesn't support misaligned stack adjustments.
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 555ede9e95403..099007a044e96 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86RegisterInfo.h"
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86FrameLowering.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
@@ -24,6 +25,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TileShapeInfo.h"
@@ -894,7 +896,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
// Determine base register and offset.
- int FIOffset;
+ int64_t FIOffset;
Register BasePtr;
if (MI.isReturn()) {
assert((!hasStackRealignment(MF) ||
@@ -945,11 +947,34 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
if (MI.getOperand(FIOperandNum+3).isImm()) {
- // Offset is a 32-bit integer.
- int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
- int Offset = FIOffset + Imm;
- assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
- "Requesting 64-bit offset in 32-bit immediate!");
+ int64_t Imm = MI.getOperand(FIOperandNum + 3).getImm();
+ int64_t Offset = FIOffset + Imm;
+ bool FitsIn32Bits = isInt<32>(Offset);
+ // If the offset will not fit in a 32-bit displacement,
+ // then for 64-bit targets, scavenge a register to hold it.
+ // Otherwise, for 32-bit targets, this is a bug!
+ if (Is64Bit && !FitsIn32Bits) {
+ assert(RS && "RegisterScavenger was NULL");
+ const X86InstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
+ RS->enterBasicBlockEnd(MBB);
+ RS->backward(std::next(II));
+
+ Register ScratchReg =
+ RS->scavengeRegisterBackwards(X86::GR64RegClass, II, false, 0, true);
+ assert(ScratchReg != 0 && "scratch reg was 0");
+ RS->setRegUsed(ScratchReg);
+
+ BuildMI(MBB, II, DL, TII->get(X86::MOV64ri), ScratchReg).addImm(Offset);
+
+ MI.getOperand(FIOperandNum + 3).setImm(0);
+ MI.getOperand(FIOperandNum + 2).setReg(ScratchReg);
+
+ return false;
+ } else if (!Is64Bit) {
+ assert(FitsIn32Bits && "Requesting 64-bit offset in 32-bit immediate!");
+ }
if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
} else {
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 7296a5f021e4a..7de1d9ea67fd4 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -13,6 +13,8 @@
#ifndef LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
#define LLVM_LIB_TARGET_X86_X86REGISTERINFO_H
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
@@ -176,6 +178,13 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ // We need to register scavenge if the frame is very large.
+ return Is64Bit && !isInt<32>(MFI.estimateStackSize(MF));
+ }
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/X86/avx512f-large-stack.ll b/llvm/test/CodeGen/X86/avx512f-large-stack.ll
new file mode 100644
index 0000000000000..3cb5391c56abf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512f-large-stack.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 4
+; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+define void @f(i16 %LGV2, i1 %LGV3) {
+; CHECK-LABEL: f:
+; CHECK: # %bb.0: # %BB
+; CHECK-NEXT: subq $2147483528, %rsp # imm = 0x7FFFFF88
+; CHECK-NEXT: .cfi_def_cfa_offset 2147483536
+; CHECK-NEXT: movb %sil, %cl
+; CHECK-NEXT: movw %di, %ax
+; CHECK-NEXT: movswq %ax, %rax
+; CHECK-NEXT: andb $1, %cl
+; CHECK-NEXT: movabsq $-2147483768, %rdx # imm = 0xFFFFFFFF7FFFFF88
+; CHECK-NEXT: movb %cl, (%rsp,%rdx)
+; CHECK-NEXT: addq $2147483528, %rsp # imm = 0x7FFFFF88
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+BB:
+ %A = alloca i1, i33 2147483648, align 1
+ %G = getelementptr i1, ptr %A, i16 %LGV2
+ %G4 = getelementptr i1, ptr %G, i32 -2147483648
+ store i1 %LGV3, ptr %G4, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/huge-stack-offset.ll b/llvm/test/CodeGen/X86/huge-stack-offset.ll
index e825328ccd89a..707401fc39e2c 100644
--- a/llvm/test/CodeGen/X86/huge-stack-offset.ll
+++ b/llvm/test/CodeGen/X86/huge-stack-offset.ll
@@ -1,23 +1,15 @@
-; RUN: llc < %s -mtriple=x86_64-linux-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -mtriple=i386-linux-unknown -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-32
+; RUN: llc < %s -mtriple=x86_64-linux-unknown -verify-machineinstrs | FileCheck %s
; Test that a large stack offset uses a single add/sub instruction to
; adjust the stack pointer.
define void @foo() nounwind {
-; CHECK-64-LABEL: foo:
-; CHECK-64: movabsq $50000000{{..}}, %rax
-; CHECK-64-NEXT: subq %rax, %rsp
-; CHECK-64-NOT: subq $2147483647, %rsp
-; CHECK-64: movabsq $50000000{{..}}, [[RAX:%r..]]
-; CHECK-64-NEXT: addq [[RAX]], %rsp
-
-; CHECK-32-LABEL: foo:
-; CHECK-32: movl $50000000{{..}}, %eax
-; CHECK-32-NEXT: subl %eax, %esp
-; CHECK-32-NOT: subl $2147483647, %esp
-; CHECK-32: movl $50000000{{..}}, [[EAX:%e..]]
-; CHECK-32-NEXT: addl [[EAX]], %esp
+; CHECK--LABEL: foo:
+; CHECK: movabsq $50000000{{..}}, %rax
+; CHECK-NEXT: subq %rax, %rsp
+; CHECK-NOT: subq $2147483647, %rsp
+; CHECK: movabsq $50000000{{..}}, [[RAX:%r..]]
+; CHECK-NEXT: addq [[RAX]], %rsp
%1 = alloca [5000000000 x i8], align 16
call void @bar(ptr %1)
ret void
@@ -26,13 +18,9 @@ define void @foo() nounwind {
; Verify that we do not clobber the return value.
define i32 @foo2() nounwind {
-; CHECK-64-LABEL: foo2:
-; CHECK-64: movl $10, %eax
-; CHECK-64-NOT: movabsq ${{.*}}, %rax
-
-; CHECK-32-LABEL: foo2:
-; CHECK-32: movl $10, %eax
-; CHECK-32-NOT: movl ${{.*}}, %eax
+; CHECK-LABEL: foo2:
+; CHECK: movl $10, %eax
+; CHECK-NOT: movabsq ${{.*}}, %rax
%1 = alloca [5000000000 x i8], align 16
call void @bar(ptr %1)
ret i32 10
@@ -41,13 +29,9 @@ define i32 @foo2() nounwind {
; Verify that we do not clobber EAX when using inreg attribute
define i32 @foo3(i32 inreg %x) nounwind {
-; CHECK-64-LABEL: foo3:
-; CHECK-64: movabsq $50000000{{..}}, %rax
-; CHECK-64-NEXT: subq %rax, %rsp
-
-; CHECK-32-LABEL: foo3:
-; CHECK-32: subl $2147483647, %esp
-; CHECK-32-NOT: movl ${{.*}}, %eax
+; CHECK-LABEL: foo3:
+; CHECK: movabsq $50000000{{..}}, %rax
+; CHECK-NEXT: subq %rax, %rsp
%1 = alloca [5000000000 x i8], align 16
call void @bar(ptr %1)
ret i32 %x
diff --git a/llvm/test/CodeGen/X86/huge-stack.ll b/llvm/test/CodeGen/X86/huge-stack.ll
index 920033ba1182c..4672efbccd01b 100644
--- a/llvm/test/CodeGen/X86/huge-stack.ll
+++ b/llvm/test/CodeGen/X86/huge-stack.ll
@@ -5,12 +5,13 @@
define void @foo() unnamed_addr #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
-; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
+; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88
; CHECK-NEXT: subq %rax, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 8589934470
-; CHECK-NEXT: movb $42, -129(%rsp)
-; CHECK-NEXT: movb $43, -128(%rsp)
-; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E
+; CHECK-NEXT: .cfi_def_cfa_offset 8589934480
+; CHECK-NEXT: movabsq $4294967177, %rax # imm = 0xFFFFFF89
+; CHECK-NEXT: movb $42, (%rsp,%rax)
+; CHECK-NEXT: movb $43, -118(%rsp)
+; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88
; CHECK-NEXT: addq %rax, %rsp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll
index 9555ce032db90..732fc6543e314 100644
--- a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll
+++ b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll
@@ -10,5 +10,5 @@ start:
attributes #0 = { nonlazybind uwtable "probe-stack"="probe_stack" "target-cpu"="x86-64" }
; CHECK-LABEL: foo:
-; CHECK: movabsq $4294967304, %rax
+; CHECK: movabsq $4294967312, %rax
; CHECK-NEXT: callq probe_stack
``````````
</details>
https://github.com/llvm/llvm-project/pull/101840
More information about the llvm-commits
mailing list