[llvm] 6fb7cdf - [X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust. (#114265)

Thu Nov 14 08:20:20 PST 2024

Author: Daniel Zabawa
Date: 2024-11-14T17:20:16+01:00
New Revision: 6fb7cdff3d90c565b87a253ff7dbd36319879111

URL: https://github.com/llvm/llvm-project/commit/6fb7cdff3d90c565b87a253ff7dbd36319879111
DIFF: https://github.com/llvm/llvm-project/commit/6fb7cdff3d90c565b87a253ff7dbd36319879111.diff

LOG: [X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust. (#114265)

This code assumed only PUSHes would appear in call sequences. However,
if calls require frame-pointer/base-pointer spills, only the PUSH
operations inserted by spillFPBP will be recognized, and the adjustments
to frame object offsets in prologepilog will be incorrect.

This change correctly reports the SP adjustment for POP and ADD/SUB to
rsp, and an assertion for unrecognized instructions that modify rsp.

Added: 
    llvm/test/CodeGen/X86/pr114265.mir

Modified: 
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
    llvm/lib/Target/X86/X86MachineFunctionInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 1b95450596314b..3af3aa838159d1 100644

--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -451,10 +451,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
     return -(I->getOperand(1).getImm());
   }
 
-  // Currently handle only PUSHes we can reasonably expect to see
-  // in call sequences
+  // Handle other opcodes we reasonably expect to see in call
+  // sequences. Note this may include spill/restore of FP/BP.
   switch (MI.getOpcode()) {
   default:
+    assert(!(MI.modifiesRegister(X86::RSP, &RI) ||
+             MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) &&
+           "Unhandled opcode in getSPAdjust");
     return 0;
   case X86::PUSH32r:
   case X86::PUSH32rmm:
@@ -466,6 +469,30 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
   case X86::PUSH64rmr:
   case X86::PUSH64i32:
     return 8;
+  case X86::POP32r:
+  case X86::POP32rmm:
+  case X86::POP32rmr:
+    return -4;
+  case X86::POP64r:
+  case X86::POP64rmm:
+  case X86::POP64rmr:
+    return -8;
+  // FIXME: (implement and) use isAddImmediate in the
+  // default case instead of the following ADD/SUB cases.
+  case X86::ADD32ri:
+  case X86::ADD32ri8:
+  case X86::ADD64ri32:
+    if (MI.getOperand(0).getReg() == X86::RSP &&
+        MI.getOperand(1).getReg() == X86::RSP)
+      return -MI.getOperand(2).getImm();
+    return 0;
+  case X86::SUB32ri:
+  case X86::SUB32ri8:
+  case X86::SUB64ri32:
+    if (MI.getOperand(0).getReg() == X86::RSP &&
+        MI.getOperand(1).getReg() == X86::RSP)
+      return MI.getOperand(2).getImm();
+    return 0;
   }
 }
 

diff  --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
index 7b57f7c23bf4da..aec8f3ee7484f6 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -15,7 +15,9 @@ using namespace llvm;
 
 yaml::X86MachineFunctionInfo::X86MachineFunctionInfo(
     const llvm::X86MachineFunctionInfo &MFI)
-    : AMXProgModel(MFI.getAMXProgModel()) {}
+    : AMXProgModel(MFI.getAMXProgModel()),
+      FPClobberedByCall(MFI.getFPClobberedByCall()),
+      HasPushSequences(MFI.getHasPushSequences()) {}
 
 void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
   MappingTraits<X86MachineFunctionInfo>::mapping(YamlIO, *this);
@@ -31,6 +33,8 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone(
 void X86MachineFunctionInfo::initializeBaseYamlFields(
     const yaml::X86MachineFunctionInfo &YamlMFI) {
   AMXProgModel = YamlMFI.AMXProgModel;
+  FPClobberedByCall = YamlMFI.FPClobberedByCall;
+  HasPushSequences = YamlMFI.HasPushSequences;
 }
 
 void X86MachineFunctionInfo::anchor() { }

diff  --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 24371369d4a452..6414e6f22500cc 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -38,6 +38,8 @@ template <> struct ScalarEnumerationTraits<AMXProgModelEnum> {
 
 struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
   AMXProgModelEnum AMXProgModel;
+  bool FPClobberedByCall;
+  bool HasPushSequences;
 
   X86MachineFunctionInfo() = default;
   X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI);
@@ -49,6 +51,8 @@ struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo {
 template <> struct MappingTraits<X86MachineFunctionInfo> {
   static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) {
     YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel);
+    YamlIO.mapOptional("FPClobberedByCall", MFI.FPClobberedByCall, false);
+    YamlIO.mapOptional("hasPushSequences", MFI.HasPushSequences, false);
   }
 };
 } // end namespace yaml

diff  --git a/llvm/test/CodeGen/X86/pr114265.mir b/llvm/test/CodeGen/X86/pr114265.mir
new file mode 100644
index 00000000000000..b6e724b4bd128a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr114265.mir
@@ -0,0 +1,94 @@
+# The change being tested here is that X86InstrInfo's getSPAdjust correctly handles POP/ADD instructions within
+# call sequences, as previously it assumed only PUSHes would be present for parameter passing.
+# What this test actually does is recreate a situation where:
+#  - something other than a PUSH appears in a call sequence, and
+#  - failing to recognize the SP adjustment by such an instruction actually changes something
+#    observable.
+#
+# To this end, we create a situation where:
+#  - the FP must be spilled around calls
+#  - a frame object is stored before a call frame and loaded in the call frame 
+#    (emulating an argument restored from spill), following a call which POPs something
+#  - call-frame pseudos can *not* be simplified early in prologepilog
+#
+# The issue being corrected is the case where prologepilog sees the SP adjustment of PUSHes only, and not
+# POP/ADD. This adjustment value can be carried over and incorrectly applied to frame offsets. So,
+# in the following we ensure that references to a frame object carry the same offset.
+#
+# NB:
+#  FPClobberedByCall and hasPushSequence have to be supplied in the MFI section. The former
+#  is required to force spill of the FP, and the latter ensures call-frame pseudos are not simplified.
+#
+#  The csr_64_intel_ocl_bi_avx512 regmask is used to ensure that the FP is spilled. Other csr's may
+#  acheive the same.
+#
+# RUN: llc -mtriple x86_64-unknown-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s 
+---
+name:            f
+alignment:       16
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   true
+failsVerification: false
+tracksDebugUserValues: true
+registers:       []
+liveins:
+  - { reg: '$rdi', virtual-reg: '' }
+  - { reg: '$rsi', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    64
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, 
+      alignment: 32, stack-id: default, callee-saved-register: '', callee-saved-restored: true, 
+      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+  FPClobberedByCall:  true
+  hasPushSequences: true
+body:             |
+  bb.0:
+    liveins: $rdi, $rsi
+    MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.0)
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0)
+    $rdi = COPY renamable $rax
+    CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+...
+# ensure the store and load to the frame object have matching offsets after resolution.
+# CHECK: MOV64mr $rsp, 1, $noreg, [[DISP:[1-9][0-9]+]]
+# CHECK: MOV64rm $rsp, 1, $noreg, [[DISP]]