[llvm] [X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust. (PR #114265)

Daniel Zabawa via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 08:30:25 PST 2024


https://github.com/daniel-zabawa updated https://github.com/llvm/llvm-project/pull/114265

>From cb1a106044c5ea81539582d5f53db049715621c0 Mon Sep 17 00:00:00 2001
From: "Zabawa, Daniel" <daniel.zabawa at intel.com>
Date: Wed, 30 Oct 2024 09:54:04 -0700
Subject: [PATCH 1/2] [X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust.

This code assumed only PUSHes would appear in call sequences. However,
if calls require frame-pointer/base-pointer spills, only the PUSH
operations inserted by spillFPBP will be recognized, and the adjustments
to frame object offsets in prologepilog will be incorrect.

This change correctly reports the SP adjustment for POP and ADD/SUB to
rsp, and an assertion for unrecognized instructions that modify rsp.

There is no unit test provided as the issue can only be reliably
reproduced in MIR (to force a spill/restore outside and inside a call
sequence, respectively), but MIR cannot serialize the FPClobberedByCall
field that triggers spillFPBP to run.
---
 llvm/lib/Target/X86/X86InstrInfo.cpp | 31 ++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 38ea1f35be2b9a..e2285417a155e4 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -452,10 +452,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
     return -(I->getOperand(1).getImm());
   }
 
-  // Currently handle only PUSHes we can reasonably expect to see
-  // in call sequences
+  // Handle other opcodes we reasonably expect to see in call
+  // sequences. Note this may include spill/restore of FP/BP.
   switch (MI.getOpcode()) {
   default:
+    assert(!(MI.modifiesRegister(X86::RSP, &RI) ||
+             MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) &&
+           "Unhandled opcode in getSPAdjust");
     return 0;
   case X86::PUSH32r:
   case X86::PUSH32rmm:
@@ -467,6 +470,30 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
   case X86::PUSH64rmr:
   case X86::PUSH64i32:
     return 8;
+  case X86::POP32r:
+  case X86::POP32rmm:
+  case X86::POP32rmr:
+    return -4;
+  case X86::POP64r:
+  case X86::POP64rmm:
+  case X86::POP64rmr:
+    return -8;
+  // FIXME: (implement and) use isAddImmediate in the
+  // default case instead of the following ADD/SUB cases.
+  case X86::ADD32ri:
+  case X86::ADD32ri8:
+  case X86::ADD64ri32:
+    if (MI.getOperand(0).getReg() == X86::RSP &&
+        MI.getOperand(1).getReg() == X86::RSP)
+      return -MI.getOperand(2).getImm();
+    return 0;
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB64ri32:
+    if (MI.getOperand(0).getReg() == X86::RSP &&
+        MI.getOperand(1).getReg() == X86::RSP)
+      return MI.getOperand(2).getImm();
+    return 0;
   }
 }
 

>From 32e26ed862911ac959b608619bbada88c0ab62f5 Mon Sep 17 00:00:00 2001
From: "Zabawa, Daniel" <daniel.zabawa at intel.com>
Date: Thu, 31 Oct 2024 12:06:46 -0700
Subject: [PATCH 2/2] Add test and new MFI serializations to support test

---
 .../lib/Target/X86/X86MachineFunctionInfo.cpp |  6 +-
 llvm/lib/Target/X86/X86MachineFunctionInfo.h  |  4 +
 llvm/test/CodeGen/X86/pr114265.mir            | 94 +++++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr114265.mir

diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
index 7b57f7c23bf4da..aec8f3ee7484f6 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -15,7 +15,9 @@ using namespace llvm;
 
 yaml::X86MachineFunctionInfo::X86MachineFunctionInfo(
     const llvm::X86MachineFunctionInfo &MFI)
-    : AMXProgModel(MFI.getAMXProgModel()) {}
+    : AMXProgModel(MFI.getAMXProgModel()),
+      FPClobberedByCall(MFI.getFPClobberedByCall()),
+      HasPushSequences(MFI.getHasPushSequences()) {}
 
 void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
   MappingTraits<X86MachineFunctionInfo>::mapping(YamlIO, *this);
@@ -31,6 +33,8 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone(
 void X86MachineFunctionInfo::initializeBaseYamlFields(
     const yaml::X86MachineFunctionInfo &YamlMFI) {
   AMXProgModel = YamlMFI.AMXProgModel;
+  FPClobberedByCall = YamlMFI.FPClobberedByCall;
+  HasPushSequences = YamlMFI.HasPushSequences;
 }
 
 void X86MachineFunctionInfo::anchor() { }
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 24371369d4a452..6414e6f22500cc 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -38,6 +38,8 @@ template <> struct ScalarEnumerationTraits<AMXProgModelEnum> {
 
 struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
   AMXProgModelEnum AMXProgModel;
+  bool FPClobberedByCall;
+  bool HasPushSequences;
 
   X86MachineFunctionInfo() = default;
   X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI);
@@ -49,6 +51,8 @@ struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
 template <> struct MappingTraits<X86MachineFunctionInfo> {
   static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) {
     YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel);
+    YamlIO.mapOptional("FPClobberedByCall", MFI.FPClobberedByCall, false);
+    YamlIO.mapOptional("hasPushSequences", MFI.HasPushSequences, false);
   }
 };
 } // end namespace yaml
diff --git a/llvm/test/CodeGen/X86/pr114265.mir b/llvm/test/CodeGen/X86/pr114265.mir
new file mode 100644
index 00000000000000..b6e724b4bd128a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr114265.mir
@@ -0,0 +1,94 @@
+# The change being tested here is that X86InstrInfo's getSPAdjust correctly handles POP/ADD instructions within
+# call sequences, as previously it assumed only PUSHes would be present for parameter passing.
+# What this test actually does is recreate a situation where:
+#  - something other than a PUSH appears in a call sequence, and
+#  - failing to recognize the SP adjustment by such an instruction actually changes something
+#    observable.
+#
+# To this end, we create a situation where:
+#  - the FP must be spilled around calls
+#  - a frame object is stored before a call frame and loaded in the call frame 
+#    (emulating an argument restored from spill), following a call which POPs something
+#  - call-frame pseudos can *not* be simplified early in prologepilog
+#
+# The issue being corrected is the case where prologepilog sees the SP adjustment of PUSHes only, and not
+# POP/ADD. This adjustment value can be carried over and incorrectly applied to frame offsets. So,
+# in the following we ensure that references to a frame object carry the same offset.
+#
+# NB:
+#  FPClobberedByCall and hasPushSequence have to be supplied in the MFI section. The former
+#  is required to force spill of the FP, and the latter ensures call-frame pseudos are not simplified.
+#
+#  The csr_64_intel_ocl_bi_avx512 regmask is used to ensure that the FP is spilled. Other csr's may
+#  acheive the same.
+#
+# RUN: llc -mtriple x86_64-unknown-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s 
+---
+name:            f
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: true
+registers:       []
+liveins:
+  - { reg: '$rdi', virtual-reg: '' }
+  - { reg: '$rsi', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    64
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, 
+      alignment: 32, stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  FPClobberedByCall:  true
+  hasPushSequences: true
+body:             |
+  bb.0:
+    liveins: $rdi, $rsi
+    MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.0)
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
+    $rdi = COPY renamable $rax
+    CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+...
+# ensure the store and load to the frame object have matching offsets after resolution.
+# CHECK: MOV64mr $rsp, 1, $noreg, [[DISP:[1-9][0-9]+]]
+# CHECK: MOV64rm $rsp, 1, $noreg, [[DISP]]



More information about the llvm-commits mailing list