[llvm] [X86] Use AlwaysInline to determine whether to emit code or bail when inlining is determined to be unprofitable (PR #87003)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 28 13:25:20 PDT 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/87003

>From 1e6fe82e2a91e692d159099d560c18bfe0104062 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Thu, 28 Mar 2024 16:19:34 -0400
Subject: [PATCH] [X86] Use AlwaysInline to determine whether to emit code or
 bail when inlining is determined to be unprofitable

Assume true when we get to the getMemset code, as it ought to be profitable by then.

Just like how getMemcpy works.
---
 llvm/lib/Target/X86/X86SelectionDAGInfo.cpp   |   8 +-
 .../CodeGen/X86/memset-vs-memset-inline.ll    | 135 +-----------------
 2 files changed, 9 insertions(+), 134 deletions(-)

diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7c630a2b0da080..50d273e69ada44 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -66,8 +66,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   // If not DWORD aligned or size is more than the threshold, call the library.
   // The libc version is likely to be faster for these cases. It can use the
   // address value and run time information about the CPU.
-  if (Alignment < Align(4) || !ConstantSize ||
-      ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) 
+  if (!ConstantSize ||
+      (!AlwaysInline &&
+       (Alignment < Align(4) ||
+        ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold())))
     return SDValue();
 
   uint64_t SizeVal = ConstantSize->getZExtValue();
@@ -142,7 +144,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
                       DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
                                   DAG.getConstant(Offset, dl, AddrVT)),
                       Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment,
-                      isVolatile, AlwaysInline,
+                      isVolatile, /* AlwaysInline */ true,
                       /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset));
   }
 
diff --git a/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll b/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll
index b8fdd936b43895..16022c6cbb3934 100644
--- a/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll
+++ b/llvm/test/CodeGen/X86/memset-vs-memset-inline.ll
@@ -28,137 +28,10 @@ define void @regular_memset_calls_external_function(ptr %a, i8 %value) nounwind
 define void @inlined_set_doesnt_call_external_function(ptr %a, i8 %value) nounwind {
 ; CHECK-LABEL: inlined_set_doesnt_call_external_function:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzbl %sil, %ecx
-; CHECK-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; CHECK-NEXT:    imulq %rcx, %rax
-; CHECK-NEXT:    movq %rax, 1016(%rdi)
-; CHECK-NEXT:    movq %rax, 1008(%rdi)
-; CHECK-NEXT:    movq %rax, 1000(%rdi)
-; CHECK-NEXT:    movq %rax, 992(%rdi)
-; CHECK-NEXT:    movq %rax, 984(%rdi)
-; CHECK-NEXT:    movq %rax, 976(%rdi)
-; CHECK-NEXT:    movq %rax, 968(%rdi)
-; CHECK-NEXT:    movq %rax, 960(%rdi)
-; CHECK-NEXT:    movq %rax, 952(%rdi)
-; CHECK-NEXT:    movq %rax, 944(%rdi)
-; CHECK-NEXT:    movq %rax, 936(%rdi)
-; CHECK-NEXT:    movq %rax, 928(%rdi)
-; CHECK-NEXT:    movq %rax, 920(%rdi)
-; CHECK-NEXT:    movq %rax, 912(%rdi)
-; CHECK-NEXT:    movq %rax, 904(%rdi)
-; CHECK-NEXT:    movq %rax, 896(%rdi)
-; CHECK-NEXT:    movq %rax, 888(%rdi)
-; CHECK-NEXT:    movq %rax, 880(%rdi)
-; CHECK-NEXT:    movq %rax, 872(%rdi)
-; CHECK-NEXT:    movq %rax, 864(%rdi)
-; CHECK-NEXT:    movq %rax, 856(%rdi)
-; CHECK-NEXT:    movq %rax, 848(%rdi)
-; CHECK-NEXT:    movq %rax, 840(%rdi)
-; CHECK-NEXT:    movq %rax, 832(%rdi)
-; CHECK-NEXT:    movq %rax, 824(%rdi)
-; CHECK-NEXT:    movq %rax, 816(%rdi)
-; CHECK-NEXT:    movq %rax, 808(%rdi)
-; CHECK-NEXT:    movq %rax, 800(%rdi)
-; CHECK-NEXT:    movq %rax, 792(%rdi)
-; CHECK-NEXT:    movq %rax, 784(%rdi)
-; CHECK-NEXT:    movq %rax, 776(%rdi)
-; CHECK-NEXT:    movq %rax, 768(%rdi)
-; CHECK-NEXT:    movq %rax, 760(%rdi)
-; CHECK-NEXT:    movq %rax, 752(%rdi)
-; CHECK-NEXT:    movq %rax, 744(%rdi)
-; CHECK-NEXT:    movq %rax, 736(%rdi)
-; CHECK-NEXT:    movq %rax, 728(%rdi)
-; CHECK-NEXT:    movq %rax, 720(%rdi)
-; CHECK-NEXT:    movq %rax, 712(%rdi)
-; CHECK-NEXT:    movq %rax, 704(%rdi)
-; CHECK-NEXT:    movq %rax, 696(%rdi)
-; CHECK-NEXT:    movq %rax, 688(%rdi)
-; CHECK-NEXT:    movq %rax, 680(%rdi)
-; CHECK-NEXT:    movq %rax, 672(%rdi)
-; CHECK-NEXT:    movq %rax, 664(%rdi)
-; CHECK-NEXT:    movq %rax, 656(%rdi)
-; CHECK-NEXT:    movq %rax, 648(%rdi)
-; CHECK-NEXT:    movq %rax, 640(%rdi)
-; CHECK-NEXT:    movq %rax, 632(%rdi)
-; CHECK-NEXT:    movq %rax, 624(%rdi)
-; CHECK-NEXT:    movq %rax, 616(%rdi)
-; CHECK-NEXT:    movq %rax, 608(%rdi)
-; CHECK-NEXT:    movq %rax, 600(%rdi)
-; CHECK-NEXT:    movq %rax, 592(%rdi)
-; CHECK-NEXT:    movq %rax, 584(%rdi)
-; CHECK-NEXT:    movq %rax, 576(%rdi)
-; CHECK-NEXT:    movq %rax, 568(%rdi)
-; CHECK-NEXT:    movq %rax, 560(%rdi)
-; CHECK-NEXT:    movq %rax, 552(%rdi)
-; CHECK-NEXT:    movq %rax, 544(%rdi)
-; CHECK-NEXT:    movq %rax, 536(%rdi)
-; CHECK-NEXT:    movq %rax, 528(%rdi)
-; CHECK-NEXT:    movq %rax, 520(%rdi)
-; CHECK-NEXT:    movq %rax, 512(%rdi)
-; CHECK-NEXT:    movq %rax, 504(%rdi)
-; CHECK-NEXT:    movq %rax, 496(%rdi)
-; CHECK-NEXT:    movq %rax, 488(%rdi)
-; CHECK-NEXT:    movq %rax, 480(%rdi)
-; CHECK-NEXT:    movq %rax, 472(%rdi)
-; CHECK-NEXT:    movq %rax, 464(%rdi)
-; CHECK-NEXT:    movq %rax, 456(%rdi)
-; CHECK-NEXT:    movq %rax, 448(%rdi)
-; CHECK-NEXT:    movq %rax, 440(%rdi)
-; CHECK-NEXT:    movq %rax, 432(%rdi)
-; CHECK-NEXT:    movq %rax, 424(%rdi)
-; CHECK-NEXT:    movq %rax, 416(%rdi)
-; CHECK-NEXT:    movq %rax, 408(%rdi)
-; CHECK-NEXT:    movq %rax, 400(%rdi)
-; CHECK-NEXT:    movq %rax, 392(%rdi)
-; CHECK-NEXT:    movq %rax, 384(%rdi)
-; CHECK-NEXT:    movq %rax, 376(%rdi)
-; CHECK-NEXT:    movq %rax, 368(%rdi)
-; CHECK-NEXT:    movq %rax, 360(%rdi)
-; CHECK-NEXT:    movq %rax, 352(%rdi)
-; CHECK-NEXT:    movq %rax, 344(%rdi)
-; CHECK-NEXT:    movq %rax, 336(%rdi)
-; CHECK-NEXT:    movq %rax, 328(%rdi)
-; CHECK-NEXT:    movq %rax, 320(%rdi)
-; CHECK-NEXT:    movq %rax, 312(%rdi)
-; CHECK-NEXT:    movq %rax, 304(%rdi)
-; CHECK-NEXT:    movq %rax, 296(%rdi)
-; CHECK-NEXT:    movq %rax, 288(%rdi)
-; CHECK-NEXT:    movq %rax, 280(%rdi)
-; CHECK-NEXT:    movq %rax, 272(%rdi)
-; CHECK-NEXT:    movq %rax, 264(%rdi)
-; CHECK-NEXT:    movq %rax, 256(%rdi)
-; CHECK-NEXT:    movq %rax, 248(%rdi)
-; CHECK-NEXT:    movq %rax, 240(%rdi)
-; CHECK-NEXT:    movq %rax, 232(%rdi)
-; CHECK-NEXT:    movq %rax, 224(%rdi)
-; CHECK-NEXT:    movq %rax, 216(%rdi)
-; CHECK-NEXT:    movq %rax, 208(%rdi)
-; CHECK-NEXT:    movq %rax, 200(%rdi)
-; CHECK-NEXT:    movq %rax, 192(%rdi)
-; CHECK-NEXT:    movq %rax, 184(%rdi)
-; CHECK-NEXT:    movq %rax, 176(%rdi)
-; CHECK-NEXT:    movq %rax, 168(%rdi)
-; CHECK-NEXT:    movq %rax, 160(%rdi)
-; CHECK-NEXT:    movq %rax, 152(%rdi)
-; CHECK-NEXT:    movq %rax, 144(%rdi)
-; CHECK-NEXT:    movq %rax, 136(%rdi)
-; CHECK-NEXT:    movq %rax, 128(%rdi)
-; CHECK-NEXT:    movq %rax, 120(%rdi)
-; CHECK-NEXT:    movq %rax, 112(%rdi)
-; CHECK-NEXT:    movq %rax, 104(%rdi)
-; CHECK-NEXT:    movq %rax, 96(%rdi)
-; CHECK-NEXT:    movq %rax, 88(%rdi)
-; CHECK-NEXT:    movq %rax, 80(%rdi)
-; CHECK-NEXT:    movq %rax, 72(%rdi)
-; CHECK-NEXT:    movq %rax, 64(%rdi)
-; CHECK-NEXT:    movq %rax, 56(%rdi)
-; CHECK-NEXT:    movq %rax, 48(%rdi)
-; CHECK-NEXT:    movq %rax, 40(%rdi)
-; CHECK-NEXT:    movq %rax, 32(%rdi)
-; CHECK-NEXT:    movq %rax, 24(%rdi)
-; CHECK-NEXT:    movq %rax, 16(%rdi)
-; CHECK-NEXT:    movq %rax, 8(%rdi)
-; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    movl $1024, %ecx # imm = 0x400
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    rep;stosb %al, %es:(%rdi)
 ; CHECK-NEXT:    retq
   tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1024, i1 0)
   ret void



More information about the llvm-commits mailing list