[llvm] 46e764a - [x86] introduce no_callee_saved_registers attribute

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 1 16:19:22 PST 2021


Author: Philip Reames
Date: 2021-02-01T16:19:14-08:00
New Revision: 46e764a628da81795af3f64bd28970b7bd4115d6

URL: https://github.com/llvm/llvm-project/commit/46e764a628da81795af3f64bd28970b7bd4115d6
DIFF: https://github.com/llvm/llvm-project/commit/46e764a628da81795af3f64bd28970b7bd4115d6.diff

LOG: [x86] introduce no_callee_saved_registers attribute

This is directly analogous to the existing no_caller_saved_registers, but with the opposite intention.  A function or call so marked shifts the responsibility of spilling the usual CSRs to it's caller.

An indirect call site and callee which don't agree on the attribute is ill defined.

The motivation for this change is that being able to prune callee saves (without modifying other details of the calling convention) is sometimes useful when generating stubs and adapters.  There's no intention to expose this as a source language feature; this is expected to be used by frontends to implement adapters where warranted.

Some specific examples of use cases:
* GC compatible compiled code wants to call an externally defined library function without needing to track pointer values through CSRs.
* debug enabled code wants to call precompiled library which doesn't provide enough information to track CSRs while preserving debug quality in caller.
* adapter stub entering hand written assembler which doesn't follow normal calling conventions.

Added: 
    llvm/test/CodeGen/X86/x86-no_callee_saved_registers.ll

Modified: 
    llvm/lib/Target/X86/X86FastISel.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86RegisterInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 2496a814e0a5..3d8f77ebe503 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3215,6 +3215,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   if ((CB && isa<CallInst>(CB) && CB->hasFnAttr("no_caller_saved_registers")))
     return false;
 
+  // Functions with no_callee_saved_registers that need special handling.
+  if ((CB && CB->hasFnAttr("no_callee_saved_registers")))
+    return false;
+
   // Functions using thunks for indirect calls need to use SDISel.
   if (Subtarget->useIndirectThunkCalls())
     return false;

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6c844e7bcfcf..53ef3a81177e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4335,11 +4335,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
-  // set X86_INTR calling convention because it has the same CSR mask
-  // (same preserved registers).
-  const uint32_t *Mask = RegInfo->getCallPreservedMask(
-      MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
+  const uint32_t *Mask = [&]() {
+    auto AdaptedCC = CallConv;
+    // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
+    // use X86_INTR calling convention because it has the same CSR mask
+    // (same preserved registers).
+    if (HasNCSR)
+      AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
+    // If NoCalleeSavedRegisters is requested, than use GHC since it happens
+    // to use the CSR_NoRegs_RegMask.
+    if (CB && CB->hasFnAttr("no_callee_saved_registers"))
+      AdaptedCC = (CallingConv::ID)CallingConv::GHC;
+    return RegInfo->getCallPreservedMask(MF, AdaptedCC);
+  }();
   assert(Mask && "Missing call preserved mask for calling convention");
 
   // If this is an invoke in a 32-bit function using a funclet-based

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index d90b4e7bdc7e..a20e8153f314 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -290,6 +290,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   if (MF->getFunction().hasFnAttribute("no_caller_saved_registers"))
     CC = CallingConv::X86_INTR;
 
+  // If atribute specified, override the CSRs normally specified by the
+  // calling convention and use the empty set instead.
+  if (MF->getFunction().hasFnAttribute("no_callee_saved_registers"))
+    return CSR_NoRegs_SaveList;
+
   switch (CC) {
   case CallingConv::GHC:
   case CallingConv::HiPE:

diff  --git a/llvm/test/CodeGen/X86/x86-no_callee_saved_registers.ll b/llvm/test/CodeGen/X86/x86-no_callee_saved_registers.ll
new file mode 100644
index 000000000000..620a7ce49614
--- /dev/null
+++ b/llvm/test/CodeGen/X86/x86-no_callee_saved_registers.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck --check-prefixes=CHECK,CHECK-O0 %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O3 < %s | FileCheck --check-prefixes=CHECK,CHECK-O3 %s
+
+declare void @external()
+declare void @no_csr() "no_callee_saved_registers"
+
+define void @normal() {
+; CHECK-LABEL: normal:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq external at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  call void @external()
+  ret void
+}
+
+; Calling a routine with no CSRs means the caller has to spill a bunch
+define void @test1() {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    callq external at PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  call void @external() "no_callee_saved_registers"
+  ret void
+}
+
+; Same as test1, but on callee, not callsite
+define void @test2() {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset %rbx, -56
+; CHECK-NEXT:    .cfi_offset %r12, -48
+; CHECK-NEXT:    .cfi_offset %r13, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    callq no_csr at PLT
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  call void @no_csr()
+  ret void
+}
+
+; on an invoke instead
+define i32 @test3() personality i8* undef {
+; CHECK-O0-LABEL: test3:
+; CHECK-O0:       # %bb.0: # %entry
+; CHECK-O0-NEXT:    pushq %rbp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    pushq %r15
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-O0-NEXT:    pushq %r14
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-NEXT:    pushq %r13
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-O0-NEXT:    pushq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-NEXT:    pushq %rbx
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O0-NEXT:    .cfi_offset %rbx, -56
+; CHECK-O0-NEXT:    .cfi_offset %r12, -48
+; CHECK-O0-NEXT:    .cfi_offset %r13, -40
+; CHECK-O0-NEXT:    .cfi_offset %r14, -32
+; CHECK-O0-NEXT:    .cfi_offset %r15, -24
+; CHECK-O0-NEXT:    .cfi_offset %rbp, -16
+; CHECK-O0-NEXT:  .Ltmp0:
+; CHECK-O0-NEXT:    callq no_csr at PLT
+; CHECK-O0-NEXT:  .Ltmp1:
+; CHECK-O0-NEXT:    jmp .LBB3_1
+; CHECK-O0-NEXT:  .LBB3_1: # %invoke.cont
+; CHECK-O0-NEXT:    movl $1, %eax
+; CHECK-O0-NEXT:    addq $8, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-O0-NEXT:    popq %rbx
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-O0-NEXT:    popq %r13
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-NEXT:    popq %r14
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-O0-NEXT:    popq %r15
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    popq %rbp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-O0-NEXT:    retq
+; CHECK-O0-NEXT:  .LBB3_2: # %lpad
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O0-NEXT:  .Ltmp2:
+; CHECK-O0-NEXT:    xorl %eax, %eax
+; CHECK-O0-NEXT:    addq $8, %rsp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-O0-NEXT:    popq %rbx
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O0-NEXT:    popq %r12
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-O0-NEXT:    popq %r13
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O0-NEXT:    popq %r14
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-O0-NEXT:    popq %r15
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O0-NEXT:    popq %rbp
+; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: test3:
+; CHECK-O3:       # %bb.0: # %entry
+; CHECK-O3-NEXT:    pushq %rbp
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O3-NEXT:    pushq %r15
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-O3-NEXT:    pushq %r14
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O3-NEXT:    pushq %r13
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-O3-NEXT:    pushq %r12
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O3-NEXT:    pushq %rbx
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O3-NEXT:    .cfi_offset %rbx, -56
+; CHECK-O3-NEXT:    .cfi_offset %r12, -48
+; CHECK-O3-NEXT:    .cfi_offset %r13, -40
+; CHECK-O3-NEXT:    .cfi_offset %r14, -32
+; CHECK-O3-NEXT:    .cfi_offset %r15, -24
+; CHECK-O3-NEXT:    .cfi_offset %rbp, -16
+; CHECK-O3-NEXT:  .Ltmp0:
+; CHECK-O3-NEXT:    callq no_csr at PLT
+; CHECK-O3-NEXT:  .Ltmp1:
+; CHECK-O3-NEXT:  # %bb.1: # %invoke.cont
+; CHECK-O3-NEXT:    movl $1, %eax
+; CHECK-O3-NEXT:  .LBB3_2: # %invoke.cont
+; CHECK-O3-NEXT:    addq $8, %rsp
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 56
+; CHECK-O3-NEXT:    popq %rbx
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-O3-NEXT:    popq %r12
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-O3-NEXT:    popq %r13
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-O3-NEXT:    popq %r14
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-O3-NEXT:    popq %r15
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-O3-NEXT:    popq %rbp
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-O3-NEXT:    retq
+; CHECK-O3-NEXT:  .LBB3_3: # %lpad
+; CHECK-O3-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-O3-NEXT:  .Ltmp2:
+; CHECK-O3-NEXT:    xorl %eax, %eax
+; CHECK-O3-NEXT:    jmp .LBB3_2
+entry:
+  invoke void @no_csr()
+     to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  ret i32 1
+
+lpad:
+  %0 = landingpad { i8*, i32 }
+          cleanup
+  ret i32 0
+}
+
+define void @no_csr_func() "no_callee_saved_registers" {
+; CHECK-LABEL: no_csr_func:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    callq external at PLT
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  call void @external()
+  ret void
+}
+


        


More information about the llvm-commits mailing list