[PATCH] D45653: Enable sibling-call optimization for functions returning structs

Ivan Sorokin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 14 07:39:46 PDT 2018


sorokin created this revision.

This is a attempt for fix issue PR28417.

Currently clang doesn't do sibling call optimization when function returns a struct by value:

  mytype f();
  
  mytype g()
  {
    return f();
  }

Generated IR (-O2):

  define void @g()(%struct.mytype* noalias sret) local_unnamed_addr #0 !dbg !7 {
    tail call void @f()(%struct.mytype* sret %0), !dbg !21
    ret void, !dbg !22
  }

Generated code:

  g(): # @g()
    push rbx
    mov rbx, rdi
    call f()
    mov rax, rbx
    pop rbx
    ret

On the other hand clang can do sibling call optimization when struct is passed by pointer:

  struct mytype
  {
      char const *a, *b, *c, *d;
  };
  
  void f(mytype*);
  
  void g(mytype* a)
  {
      return f(a);
  }

Generated IR (-O2):

  define void @g(mytype*)(%struct.mytype*) local_unnamed_addr #0 !dbg !7 {
    call void @llvm.dbg.value(metadata %struct.mytype* %0, metadata !13, metadata !DIExpression()), !dbg !14
    tail call void @f(mytype*)(%struct.mytype* %0), !dbg !15
    ret void, !dbg !16
  }

Generated code:

  g(mytype*): # @g(mytype*)
    jmp f(mytype*) # TAILCALL

The difference between these two IRs are the presence of sret attribute. I believe tail call optimization is possible in the first case too. The reason why tail call optimization is not performed is that X86TargetLowering::IsEligibleForTailCallOptimization has the following if:

  // Also avoid sibcall optimization if either caller or callee uses struct
  // return semantics.
  if (isCalleeStructRet || isCallerStructRet)
      return false;

Unfortunately when this if was added, no explanation was given why it is needed here. The corresponding test case is marked rdar://7726868, but I can not see what it is about.

As far as I understand sibling call can be performed when both caller and callee are marked sret. The case when caller and callee have mismatched sret specification is trickier.

As I understand attribute sret serves two purposes:

1. (from documentation) This indicates that the pointer parameter specifies the address of a structure that is the return value of the function in the source program. This pointer must be guaranteed by the caller to be valid: loads and stores to the structure may be assumed by the callee not to trap and to be properly aligned. This is not a valid attribute for return values.
2. (from Itanium ABI) callee function should return its sret argument in RAX.

>From (2) sret caller can not sibling-call non-sret callee. The reverse is allowed. Based on this I updated X86TargetLowering::IsEligibleForTailCallOptimization.


Repository:
  rL LLVM

https://reviews.llvm.org/D45653

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/sibcall.ll


Index: test/CodeGen/X86/sibcall.ll
===================================================================
--- test/CodeGen/X86/sibcall.ll
+++ test/CodeGen/X86/sibcall.ll
@@ -307,12 +307,10 @@
 ; 32: retl $4
 
 ; 64-LABEL: t15:
-; 64: callq {{_?}}f
-; 64: retq
+; 64: jmp {{_?}}f
 
 ; X32ABI-LABEL: t15:
-; X32ABI: callq {{_?}}f
-; X32ABI: retq
+; X32ABI: jmp {{_?}}f
   tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
   ret void
 }
@@ -408,3 +406,49 @@
 }
 
 declare fastcc double @foo20(double) nounwind
+
+; bug 28417
+define fastcc void @t21_sret_to_sret(%struct.foo* noalias sret %agg.result) nounwind  {
+; 32-LABEL: t21_sret_to_sret:
+; 32: jmp {{_?}}t21_f_sret
+
+; 64-LABEL: t21_sret_to_sret:
+; 64: jmp {{_?}}t21_f_sret
+
+; X32ABI-LABEL: t21_sret_to_sret:
+; X32ABI: jmp {{_?}}t21_f_sret
+  tail call fastcc void @t21_f_sret(%struct.foo* noalias sret %agg.result) nounwind
+  ret void
+}
+
+define fastcc void @t21_sret_to_non_sret(%struct.foo* noalias sret %agg.result) nounwind  {
+; 32-LABEL: t21_sret_to_non_sret:
+; 32: calll {{_?}}t21_f_non_sret
+; 32: retl
+
+; 64-LABEL: t21_sret_to_non_sret:
+; 64: callq {{_?}}t21_f_non_sret
+; 64: retq
+
+; X32ABI-LABEL: t21_sret_to_non_sret:
+; X32ABI: callq {{_?}}t21_f_non_sret
+; X32ABI: retq
+  tail call fastcc void @t21_f_non_sret(%struct.foo* %agg.result) nounwind
+  ret void
+}
+
+define fastcc void @t21_non_sret_to_sret(%struct.foo* %agg.result) nounwind  {
+; 32-LABEL: t21_non_sret_to_sret:
+; 32: jmp {{_?}}t21_f_sret
+
+; 64-LABEL: t21_non_sret_to_sret:
+; 64: jmp {{_?}}t21_f_sret
+
+; X32ABI-LABEL: t21_non_sret_to_sret:
+; X32ABI: jmp {{_?}}t21_f_sret
+  tail call fastcc void @t21_f_sret(%struct.foo* noalias sret %agg.result) nounwind
+  ret void
+}
+
+declare fastcc void @t21_f_sret(%struct.foo* noalias sret) nounwind
+declare fastcc void @t21_f_non_sret(%struct.foo*) nounwind
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -4155,9 +4155,9 @@
   if (RegInfo->needsStackRealignment(MF))
     return false;
 
-  // Also avoid sibcall optimization if either caller or callee uses struct
-  // return semantics.
-  if (isCalleeStructRet || isCallerStructRet)
+  // Struct-return functions need to return its argument in RAX, so they can not
+  // sibcall non-struct-return functions.
+  if (!isCalleeStructRet && isCallerStructRet)
     return false;
 
   // Do not sibcall optimize vararg calls unless all arguments are passed via


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D45653.142513.patch
Type: text/x-patch
Size: 2575 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180414/897562fa/attachment.bin>


More information about the llvm-commits mailing list