[llvm] [X86][IPRA] Add getIPRACSRegs since frame registers are risked to be optimized out. (PR #109597)

Freddy Ye via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 18:36:29 PDT 2024


https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/109597

>From 8ada603aee6d62093ce0e1d431d8ddf654150b3c Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 3 Sep 2024 17:07:19 +0800
Subject: [PATCH 1/8] [X86][IPRA] Add getIPRACSRegs since rbp is risked to be
 optimized out.

This patch is a workaround to fix the correctness of IPRA on X86.
---
 .../include/llvm/CodeGen/TargetRegisterInfo.h |  4 ++
 llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp  |  6 ++-
 llvm/lib/Target/X86/X86CallingConv.td         |  3 ++
 llvm/lib/Target/X86/X86RegisterInfo.cpp       |  5 +++
 llvm/lib/Target/X86/X86RegisterInfo.h         |  3 ++
 llvm/test/CodeGen/X86/ipra-local-linkage-2.ll | 37 +++++++++++++++++++
 6 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/ipra-local-linkage-2.ll

diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 1a2f31e199336a..0f6484fddfe61f 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -489,6 +489,10 @@ class TargetRegisterInfo : public MCRegisterInfo {
   virtual const MCPhysReg*
   getCalleeSavedRegs(const MachineFunction *MF) const = 0;
 
+  /// Return a null-terminated list of all of the callee-saved registers on
+  /// this target when IPRA is on. Normally, this list should be null.
+  virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const = 0;
+
   /// Return a mask of call-preserved registers for the given calling convention
   /// on the current function. The mask should include all call-preserved
   /// aliases. This is used by the register allocator to determine which
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 7d054cb7c7c71f..364cc933731dec 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -107,8 +107,12 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // are preferred over callee saved registers.
   if (MF.getTarget().Options.EnableIPRA &&
       isSafeForNoCSROpt(MF.getFunction()) &&
-      isProfitableForNoCSROpt(MF.getFunction()))
+      isProfitableForNoCSROpt(MF.getFunction())) {
+    const MCPhysReg *IPRACSRegs = TRI.getIPRACSRegs(&MF);
+    for (unsigned i = 0; IPRACSRegs[i]; ++i)
+      SavedRegs.set(IPRACSRegs[i]);
     return;
+  }
 
   // Get the callee saved register list...
   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 307aeb2ea4c6fd..472823a6d036ba 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -1104,6 +1104,9 @@ def CC_X86 : CallingConv<[
 
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
+def CSR_IPRA_32 : CalleeSavedRegs<(add EBP)>;
+def CSR_IPRA_64 : CalleeSavedRegs<(add RBP)>;
+
 def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
 def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
 
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 1d8808f4e2b7d0..302d50581e1e6b 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -410,6 +410,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   return CallsEHReturn ? CSR_32EHRet_SaveList : CSR_32_SaveList;
 }
 
+const MCPhysReg *
+X86RegisterInfo::getIPRACSRegs(const MachineFunction *MF) const {
+  return Is64Bit ? CSR_IPRA_64_SaveList : CSR_IPRA_32_SaveList;
+}
+
 const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
     const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 2f73698a4b94d3..68ee372f27b14d 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -99,6 +99,9 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
   /// callee-save registers on this target.
   const MCPhysReg *
   getCalleeSavedRegs(const MachineFunction* MF) const override;
+  /// getIPRACSRegs - This API can be removed when rbp is safe to optimized out
+  /// when IPRA is on.
+  const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const override;
   const MCPhysReg *
   getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
diff --git a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
new file mode 100644
index 00000000000000..f337dfc989d81a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -enable-ipra < %s | FileCheck %s
+
+; This test is to ensure rbp is correctly saved/restored before/after the
+; inline asm call in foo()
+
+target triple = "x86_64--"
+
+define internal void @foo() norecurse nounwind {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    xorl %ebp, %ebp
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+  call void asm sideeffect "xor %ebp, %ebp", "~{ebp}"()
+  ret void
+}
+
+define void @bar(i32 %X) "frame-pointer"="all" nounwind {
+; CHECK-LABEL: bar:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    subq $16, %rsp
+; CHECK-NEXT:    callq foo
+; CHECK-NEXT:    movl $5, -4(%rbp)
+; CHECK-NEXT:    addq $16, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    retq
+  call void @foo()
+  %addr = alloca i32, align 4
+  store i32 5, ptr %addr, align 4
+  ret void
+}

>From 12f3319d2d4d7740395f6066721994b458e5c8e2 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 23 Sep 2024 11:19:34 +0800
Subject: [PATCH 2/8] fix CI

---
 llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 4 +++-
 llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp   | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 0f6484fddfe61f..0ca435eb945db2 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -491,7 +491,9 @@ class TargetRegisterInfo : public MCRegisterInfo {
 
   /// Return a null-terminated list of all of the callee-saved registers on
   /// this target when IPRA is on. Normally, this list should be null.
-  virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const = 0;
+  virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const {
+    return nullptr;
+  }
 
   /// Return a mask of call-preserved registers for the given calling convention
   /// on the current function. The mask should include all call-preserved
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 364cc933731dec..4885811399a034 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -109,7 +109,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
       isSafeForNoCSROpt(MF.getFunction()) &&
       isProfitableForNoCSROpt(MF.getFunction())) {
     const MCPhysReg *IPRACSRegs = TRI.getIPRACSRegs(&MF);
-    for (unsigned i = 0; IPRACSRegs[i]; ++i)
+    for (unsigned i = 0; IPRACSRegs && IPRACSRegs[i]; ++i)
       SavedRegs.set(IPRACSRegs[i]);
     return;
   }

>From e00bcd9051a4a627838ab6b90d41798310ec9927 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 24 Sep 2024 11:21:46 +0800
Subject: [PATCH 3/8] Add more X86 potential frame registers in CSR_IPRA_32/64

---
 llvm/lib/Target/X86/X86CallingConv.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 472823a6d036ba..91af111db8cda5 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -1104,8 +1104,8 @@ def CC_X86 : CallingConv<[
 
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 
-def CSR_IPRA_32 : CalleeSavedRegs<(add EBP)>;
-def CSR_IPRA_64 : CalleeSavedRegs<(add RBP)>;
+def CSR_IPRA_32 : CalleeSavedRegs<(add EBP, ESI)>;
+def CSR_IPRA_64 : CalleeSavedRegs<(add RBP, RBX)>;
 
 def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
 def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;

>From d7e1ea3d77716f60abbd15a4c5b112d986eed080 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 24 Sep 2024 19:55:36 +0800
Subject: [PATCH 4/8] fix CI, risked CSRegs also require check
 isPhysRegModified

---
 llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp | 21 ++++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 4885811399a034..42571511ba6cfb 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -103,19 +103,18 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // saved registers.
   SavedRegs.resize(TRI.getNumRegs());
 
-  // When interprocedural register allocation is enabled caller saved registers
-  // are preferred over callee saved registers.
+  // Get the callee saved register list...
+  const MCPhysReg *CSRegs = nullptr;
+
+  // When interprocedural register allocation is enabled, callee saved register
+  // lit should be null, since caller saved registers are preferred over callee
+  // saved registers. Unless it has some risked CSR to be optimized out.
   if (MF.getTarget().Options.EnableIPRA &&
       isSafeForNoCSROpt(MF.getFunction()) &&
-      isProfitableForNoCSROpt(MF.getFunction())) {
-    const MCPhysReg *IPRACSRegs = TRI.getIPRACSRegs(&MF);
-    for (unsigned i = 0; IPRACSRegs && IPRACSRegs[i]; ++i)
-      SavedRegs.set(IPRACSRegs[i]);
-    return;
-  }
-
-  // Get the callee saved register list...
-  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+      isProfitableForNoCSROpt(MF.getFunction()))
+    CSRegs = TRI.getIPRACSRegs(&MF);
+  else
+    CSRegs = MF.getRegInfo().getCalleeSavedRegs();
 
   // Early exit if there are no callee saved registers.
   if (!CSRegs || CSRegs[0] == 0)

>From de92c7f060f9615cffc9368cc11f2990a1728a31 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 25 Sep 2024 13:10:48 +0800
Subject: [PATCH 5/8] address comments

---
 llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp  |   4 +-
 llvm/test/CodeGen/X86/ipra-local-linkage-2.ll | 173 +++++++++++++++---
 2 files changed, 149 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 42571511ba6cfb..4ee86e07e1a5a3 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -107,8 +107,8 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
   const MCPhysReg *CSRegs = nullptr;
 
   // When interprocedural register allocation is enabled, callee saved register
-  // lit should be null, since caller saved registers are preferred over callee
-  // saved registers. Unless it has some risked CSR to be optimized out.
+  // list should be empty, since caller saved registers are preferred over
+  // callee saved registers. Unless it has some risked CSR to be optimized out.
   if (MF.getTarget().Options.EnableIPRA &&
       isSafeForNoCSROpt(MF.getFunction()) &&
       isProfitableForNoCSROpt(MF.getFunction()))
diff --git a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
index f337dfc989d81a..d8b0a749debb55 100644
--- a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
+++ b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
@@ -1,37 +1,158 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -enable-ipra < %s | FileCheck %s
+; RUN: llc --mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 %s
+; RUN: llc --mtriple=i386-- < %s | FileCheck --check-prefix=X86 %s
 
-; This test is to ensure rbp is correctly saved/restored before/after the
-; inline asm call in foo()
+; This test is to ensure rbp/rbx/ebp/esi is correctly saved/restored before clobbered when enable ipra.
 
-target triple = "x86_64--"
-
-define internal void @foo() norecurse nounwind {
-; CHECK-LABEL: foo:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    xorl %ebp, %ebp
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
+define internal void @callee_clobber_rbp() norecurse nounwind {
+; X64-LABEL: callee_clobber_rbp:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    #APP
+; X64-NEXT:    xorl %ebp, %ebp
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+;
+; X86-LABEL: callee_clobber_rbp:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    #APP
+; X86-NEXT:    xorl %ebp, %ebp
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
   call void asm sideeffect "xor %ebp, %ebp", "~{ebp}"()
   ret void
 }
 
-define void @bar(i32 %X) "frame-pointer"="all" nounwind {
-; CHECK-LABEL: bar:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rbp
-; CHECK-NEXT:    movq %rsp, %rbp
-; CHECK-NEXT:    subq $16, %rsp
-; CHECK-NEXT:    callq foo
-; CHECK-NEXT:    movl $5, -4(%rbp)
-; CHECK-NEXT:    addq $16, %rsp
-; CHECK-NEXT:    popq %rbp
-; CHECK-NEXT:    retq
-  call void @foo()
+define internal void @callee_clobber_rbx() norecurse nounwind {
+; X64-LABEL: callee_clobber_rbx:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    #APP
+; X64-NEXT:    xorl %ebx, %ebx
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  call void asm sideeffect "xor %ebx, %ebx", "~{ebx}"()
+  ret void
+}
+
+define internal void @callee_clobber_esi() norecurse nounwind {
+; X86-LABEL: callee_clobber_esi:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    #APP
+; X86-NEXT:    xorl %esi, %esi
+; X86-NEXT:    #NO_APP
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+  call void asm sideeffect "xor %esi, %esi", "~{esi}"()
+  ret void
+}
+
+define void @caller_use_rbp() "frame-pointer"="all" nounwind {
+; X64-LABEL: caller_use_rbp:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    movq %rsp, %rbp
+; X64-NEXT:    subq $16, %rsp
+; X64-NEXT:    callq callee_clobber_rbp
+; X64-NEXT:    movl $5, -4(%rbp)
+; X64-NEXT:    addq $16, %rsp
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+;
+; X86-LABEL: caller_use_rbp:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll callee_clobber_rbp
+; X86-NEXT:    movl $5, -4(%ebp)
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+  call void @callee_clobber_rbp()
   %addr = alloca i32, align 4
   store i32 5, ptr %addr, align 4
   ret void
 }
+
+define void @caller_use_rbx(i32 %X) nounwind ssp {
+; X64-LABEL: caller_use_rbx:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    movq %rsp, %rbp
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    andq $-32, %rsp
+; X64-NEXT:    subq $64, %rsp
+; X64-NEXT:    movq %rsp, %rbx
+; X64-NEXT:    movq __stack_chk_guard(%rip), %rax
+; X64-NEXT:    movq %rax, 32(%rbx)
+; X64-NEXT:    movq %rsp, %rax
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    leaq 15(,%rcx,4), %rcx
+; X64-NEXT:    andq $-16, %rcx
+; X64-NEXT:    subq %rcx, %rax
+; X64-NEXT:    movq %rax, %rsp
+; X64-NEXT:    movq %rbx, %rdi
+; X64-NEXT:    callq callee_clobber_rbx
+; X64-NEXT:    movq __stack_chk_guard(%rip), %rax
+; X64-NEXT:    cmpq 32(%rbx), %rax
+; X64-NEXT:    jne .LBB4_2
+; X64-NEXT:  # %bb.1:
+; X64-NEXT:    leaq -8(%rbp), %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+; X64-NEXT:  .LBB4_2:
+; X64-NEXT:    callq __stack_chk_fail at PLT
+  %realign = alloca i32, align 32
+  %addr = alloca i32, i32 %X
+  call void @callee_clobber_rbx(ptr %realign)
+  ret void
+}
+
+define void @caller_use_esi(i32 %X) ssp {
+; X86-LABEL: caller_use_esi:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %ebp, -8
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    .cfi_def_cfa_register %ebp
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-32, %esp
+; X86-NEXT:    subl $32, %esp
+; X86-NEXT:    movl %esp, %esi
+; X86-NEXT:    .cfi_offset %esi, -12
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl __stack_chk_guard, %ecx
+; X86-NEXT:    movl %ecx, 16(%esi)
+; X86-NEXT:    movl %esp, %ecx
+; X86-NEXT:    shll $2, %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, %esp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll callee_clobber_esi
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl __stack_chk_guard, %eax
+; X86-NEXT:    cmpl 16(%esi), %eax
+; X86-NEXT:    jne .LBB5_2
+; X86-NEXT:  # %bb.1:
+; X86-NEXT:    leal -4(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    .cfi_def_cfa %esp, 4
+; X86-NEXT:    retl
+; X86-NEXT:  .LBB5_2:
+; X86-NEXT:    .cfi_def_cfa %ebp, 8
+; X86-NEXT:    calll __stack_chk_fail
+  %realign = alloca i32, align 32
+  %addr = alloca i32, i32 %X
+  call void @callee_clobber_esi(ptr %realign)
+  ret void
+}

>From 7beeba8e26d8c7bdbcad95331f2c83c957d98e3d Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 25 Sep 2024 13:12:43 +0800
Subject: [PATCH 6/8] update test with nounwind

---
 llvm/test/CodeGen/X86/ipra-local-linkage-2.ll | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
index d8b0a749debb55..684331aadbde01 100644
--- a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
+++ b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
@@ -115,19 +115,15 @@ define void @caller_use_rbx(i32 %X) nounwind ssp {
   ret void
 }
 
-define void @caller_use_esi(i32 %X) ssp {
+define void @caller_use_esi(i32 %X) nounwind ssp {
 ; X86-LABEL: caller_use_esi:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
-; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebp, -8
 ; X86-NEXT:    movl %esp, %ebp
-; X86-NEXT:    .cfi_def_cfa_register %ebp
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-32, %esp
 ; X86-NEXT:    subl $32, %esp
 ; X86-NEXT:    movl %esp, %esi
-; X86-NEXT:    .cfi_offset %esi, -12
 ; X86-NEXT:    movl 8(%ebp), %eax
 ; X86-NEXT:    movl __stack_chk_guard, %ecx
 ; X86-NEXT:    movl %ecx, 16(%esi)
@@ -146,10 +142,8 @@ define void @caller_use_esi(i32 %X) ssp {
 ; X86-NEXT:    leal -4(%ebp), %esp
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %ebp
-; X86-NEXT:    .cfi_def_cfa %esp, 4
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB5_2:
-; X86-NEXT:    .cfi_def_cfa %ebp, 8
 ; X86-NEXT:    calll __stack_chk_fail
   %realign = alloca i32, align 32
   %addr = alloca i32, i32 %X

>From 98ec015e0f6cfa68cf15ed431ae1722cb0d31d62 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 25 Sep 2024 13:40:21 +0800
Subject: [PATCH 7/8] update test

---
 llvm/test/CodeGen/X86/ipra-local-linkage-2.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
index 684331aadbde01..05d3f70820fb0c 100644
--- a/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
+++ b/llvm/test/CodeGen/X86/ipra-local-linkage-2.ll
@@ -4,7 +4,7 @@
 
 ; This test is to ensure rbp/rbx/ebp/esi is correctly saved/restored before clobbered when enable ipra.
 
-define internal void @callee_clobber_rbp() norecurse nounwind {
+define internal void @callee_clobber_rbp() nounwind norecurse {
 ; X64-LABEL: callee_clobber_rbp:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pushq %rbp
@@ -26,7 +26,7 @@ define internal void @callee_clobber_rbp() norecurse nounwind {
   ret void
 }
 
-define internal void @callee_clobber_rbx() norecurse nounwind {
+define internal void @callee_clobber_rbx(ptr %addr) nounwind norecurse {
 ; X64-LABEL: callee_clobber_rbx:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pushq %rbx
@@ -39,7 +39,7 @@ define internal void @callee_clobber_rbx() norecurse nounwind {
   ret void
 }
 
-define internal void @callee_clobber_esi() norecurse nounwind {
+define internal void @callee_clobber_esi(ptr %addr) nounwind norecurse {
 ; X86-LABEL: callee_clobber_esi:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %esi

>From ef0f2682c9d8866a2a1fb552cb5941dbf416be56 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 26 Sep 2024 09:36:06 +0800
Subject: [PATCH 8/8] address comments

---
 llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 0ca435eb945db2..9ea0fba1144b13 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -490,7 +490,11 @@ class TargetRegisterInfo : public MCRegisterInfo {
   getCalleeSavedRegs(const MachineFunction *MF) const = 0;
 
   /// Return a null-terminated list of all of the callee-saved registers on
-  /// this target when IPRA is on. Normally, this list should be null.
+  /// this target when IPRA is on. The list should include any non-allocatable
+  /// registers that the backend uses and assumes will be saved by all calling
+  /// conventions. This is typically the ISA-standard frame pointer, but could
+  /// include the thread pointer, TOC pointer, or base pointer for different
+  /// targets.
   virtual const MCPhysReg *getIPRACSRegs(const MachineFunction *MF) const {
     return nullptr;
   }



More information about the llvm-commits mailing list