[llvm] [X86] Don't fold very large offsets into addr displacements during ISel (PR #121678)

Thu Jan 16 19:21:18 PST 2025

https://github.com/wesleywiser updated https://github.com/llvm/llvm-project/pull/121678

>From 7f78b6c3acfd97a2fee8123e7ba2a6b3a545ad9f Mon Sep 17 00:00:00 2001
From: Wesley Wiser <wwiser at gmail.com>
Date: Sun, 5 Jan 2025 03:48:18 +0000
Subject: [PATCH 1/2] [nfc] Add regression test for large offsets in X86 ISel

---
 llvm/test/CodeGen/X86/dag-large-offset.ll | 46 +++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/dag-large-offset.ll

diff --git a/llvm/test/CodeGen/X86/dag-large-offset.ll b/llvm/test/CodeGen/X86/dag-large-offset.ll
new file mode 100644
index 00000000000000..fe674d20a180fc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/dag-large-offset.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i386 --frame-pointer=all | FileCheck %s
+
+; ISel will try to fold pointer arithmetic into the address displacement. However, we don't
+; want to do that if the offset is very close to the expressible limit because the final frame
+; layout may push it over/under the limit.
+
+define i32 @foo(i1 %b) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    .cfi_offset %ebp, -8
+; CHECK-NEXT:    movl %esp, %ebp
+; CHECK-NEXT:    .cfi_def_cfa_register %ebp
+; CHECK-NEXT:    subl $8, %esp
+; CHECK-NEXT:    movl __stack_chk_guard, %eax
+; CHECK-NEXT:    movl %eax, -4(%ebp)
+; CHECK-NEXT:    testb $1, 8(%ebp)
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    leal 2147483644(%ebp), %eax
+; CHECK-NEXT:  .LBB0_3: # %entry
+; CHECK-NEXT:    movl __stack_chk_guard, %ecx
+; CHECK-NEXT:    cmpl -4(%ebp), %ecx
+; CHECK-NEXT:    jne .LBB0_5
+; CHECK-NEXT:  # %bb.4: # %entry
+; CHECK-NEXT:    addl $8, %esp
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    .cfi_def_cfa %esp, 4
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB0_5: # %entry
+; CHECK-NEXT:    .cfi_def_cfa %ebp, 8
+; CHECK-NEXT:    calll __stack_chk_fail
+entry:
+  %a = alloca i8, align 1
+  %0 = ptrtoint ptr %a to i32
+  %sub = add i32 %0, -2147483647
+  %retval.0 = select i1 %b, i32 %sub, i32 0
+  ret i32 %retval.0
+}
+
+attributes #0 = { sspreq }

>From 56142a5b6fdfa8a3f8d51d208d984e69bb9e7523 Mon Sep 17 00:00:00 2001
From: Wesley Wiser <wwiser at gmail.com>
Date: Sun, 5 Jan 2025 04:22:34 +0000
Subject: [PATCH 2/2] [X86] Don't fold very large offsets into addr
 displacements during ISel

Doing so can cause the resulting displacement after frame layout to
become inexpressible (or cause over/underflow currently during frame
layout).
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp   | 18 +++++++++++-------
 llvm/test/CodeGen/X86/dag-large-offset.ll |  3 ++-
 llvm/test/CodeGen/X86/xor-lea.ll          |  3 ++-
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b340a778b36ad..84bcdae520885e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1800,10 +1800,10 @@ void X86DAGToDAGISel::emitFunctionEntryCode() {
     emitSpecialCodeForMain();
 }
 
-static bool isDispSafeForFrameIndex(int64_t Val) {
-  // On 64-bit platforms, we can run into an issue where a frame index
+static bool isDispSafeForFrameIndexOrRegBase(int64_t Val) {
+  // We can run into an issue where a frame index or a register base
   // includes a displacement that, when added to the explicit displacement,
-  // will overflow the displacement field. Assuming that the frame index
+  // will overflow the displacement field. Assuming that the
   // displacement fits into a 31-bit integer  (which is only slightly more
   // aggressive than the current fundamental assumption that it fits into
   // a 32-bit integer), a 31-bit disp should always be safe.
@@ -1831,7 +1831,7 @@ bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
     // In addition to the checks required for a register base, check that
     // we do not try to use an unsafe Disp with a frame index.
     if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
-        !isDispSafeForFrameIndex(Val))
+        !isDispSafeForFrameIndexOrRegBase(Val))
       return true;
     // In ILP32 (x32) mode, pointers are 32 bits and need to be zero-extended to
     // 64 bits. Instructions with 32-bit register addresses perform this zero
@@ -1849,10 +1849,14 @@ bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
     // to get an address size override to be emitted. However, this
     // pseudo-register is not part of any register class and therefore causes
     // MIR verification to fail.
-    if (Subtarget->isTarget64BitILP32() && !isUInt<31>(Val) &&
+    if (Subtarget->isTarget64BitILP32() &&
+        !isDispSafeForFrameIndexOrRegBase((uint32_t)Val) &&
         !AM.hasBaseOrIndexReg())
       return true;
-  }
+  } else if (AM.hasBaseOrIndexReg() && !isDispSafeForFrameIndexOrRegBase(Val))
+    // For 32-bit X86, make sure the displacement still isn't close to the
+    // expressible limit.
+    return true;
   AM.Disp = Val;
   return false;
 }
@@ -2553,7 +2557,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   case ISD::FrameIndex:
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         AM.Base_Reg.getNode() == nullptr &&
-        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
+        (!Subtarget->is64Bit() || isDispSafeForFrameIndexOrRegBase(AM.Disp))) {
       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
       AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
       return false;
diff --git a/llvm/test/CodeGen/X86/dag-large-offset.ll b/llvm/test/CodeGen/X86/dag-large-offset.ll
index fe674d20a180fc..2774a939931539 100644
--- a/llvm/test/CodeGen/X86/dag-large-offset.ll
+++ b/llvm/test/CodeGen/X86/dag-large-offset.ll
@@ -22,7 +22,8 @@ define i32 @foo(i1 %b) #0 {
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    jmp .LBB0_3
 ; CHECK-NEXT:  .LBB0_1:
-; CHECK-NEXT:    leal 2147483644(%ebp), %eax
+; CHECK-NEXT:    movl $-2147483647, %eax # imm = 0x80000001
+; CHECK-NEXT:    leal -5(%ebp,%eax), %eax
 ; CHECK-NEXT:  .LBB0_3: # %entry
 ; CHECK-NEXT:    movl __stack_chk_guard, %ecx
 ; CHECK-NEXT:    cmpl -4(%ebp), %ecx
diff --git a/llvm/test/CodeGen/X86/xor-lea.ll b/llvm/test/CodeGen/X86/xor-lea.ll
index 10e9525a2706a3..d50752e48d293f 100644
--- a/llvm/test/CodeGen/X86/xor-lea.ll
+++ b/llvm/test/CodeGen/X86/xor-lea.ll
@@ -327,7 +327,8 @@ define i32 @xor_shl_sminval_i32(i32 %x) {
 ; X86-LABEL: xor_shl_sminval_i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    leal -2147483648(,%eax,8), %eax
+; X86-NEXT:    movl $-2147483648, %ecx # imm = 0x80000000
+; X86-NEXT:    leal (%ecx,%eax,8), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: xor_shl_sminval_i32: