[llvm] dc33679 - [SelectionDAG] Handle bzero/memset libcalls globally instead of per target

Guillaume Chatelet via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 9 01:47:04 PDT 2022


Author: Guillaume Chatelet
Date: 2022-06-09T08:34:55Z
New Revision: dc3367970e9bdd9ce2a47efd080ed493ed4e8f4b

URL: https://github.com/llvm/llvm-project/commit/dc3367970e9bdd9ce2a47efd080ed493ed4e8f4b
DIFF: https://github.com/llvm/llvm-project/commit/dc3367970e9bdd9ce2a47efd080ed493ed4e8f4b.diff

LOG: [SelectionDAG] Handle bzero/memset libcalls globally instead of per target

Differential Revision: https://reviews.llvm.org/D127279

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
    llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
    llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
    llvm/test/CodeGen/AArch64/arm64_32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a917bf73424a2..efb86e154d684 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7345,30 +7345,48 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
   checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
 
   // Emit a library call.
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
-  Args.push_back(Entry);
-  Entry.Node = Src;
-  Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
-  Args.push_back(Entry);
-  Entry.Node = Size;
-  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
-  Args.push_back(Entry);
+  auto &Ctx = *getContext();
+  const auto& DL = getDataLayout();
 
-  // FIXME: pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(dl)
-      .setChain(Chain)
-      .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
-                    Dst.getValueType().getTypeForEVT(*getContext()),
-                    getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
-                                      TLI->getPointerTy(getDataLayout())),
-                    std::move(Args))
-      .setDiscardResult()
-      .setTailCall(isTailCall);
+  // FIXME: pass in SDLoc
+  CLI.setDebugLoc(dl).setChain(Chain);
+
+  ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src);
+  const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero();
+  const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO);
+
+  // Helper function to create an Entry from Node and Type.
+  const auto CreateEntry = [](SDValue Node, Type *Ty) {
+    TargetLowering::ArgListEntry Entry;
+    Entry.Node = Node;
+    Entry.Ty = Ty;
+    return Entry;
+  };
 
-  std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+  // If zeroing out and bzero is present, use it.
+  if (SrcIsZero && BzeroName) {
+    TargetLowering::ArgListTy Args;
+    Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+    Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+    CLI.setLibCallee(
+        TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx),
+        getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args));
+  } else {
+    TargetLowering::ArgListTy Args;
+    Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx)));
+    Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx)));
+    Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx)));
+    CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+                     Dst.getValueType().getTypeForEVT(Ctx),
+                     getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+                                       TLI->getPointerTy(DL)),
+                     std::move(Args));
+  }
+
+  CLI.setDiscardResult().setTailCall(isTailCall);
+
+  std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 893269c1a7efd..07494c42c6477 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -100,38 +100,6 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
     return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size,
                     Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{});
   }
-
-  // Check to see if there is a specialized entry-point for memory zeroing.
-  ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
-  ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
-  const char *bzeroName =
-      (V && V->isZero())
-          ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
-          : nullptr;
-  // For small size (< 256), it is not beneficial to use bzero
-  // instead of memset.
-  if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) {
-    const AArch64TargetLowering &TLI = *STI.getTargetLowering();
-
-    EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
-    Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext());
-    TargetLowering::ArgListTy Args;
-    TargetLowering::ArgListEntry Entry;
-    Entry.Node = Dst;
-    Entry.Ty = IntPtrTy;
-    Args.push_back(Entry);
-    Entry.Node = Size;
-    Args.push_back(Entry);
-    TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(dl)
-        .setChain(Chain)
-        .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                      DAG.getExternalSymbol(bzeroName, IntPtr),
-                      std::move(Args))
-        .setDiscardResult();
-    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-    return CallResult.second;
-  }
   return SDValue();
 }
 

diff  --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 5e59081c63b08..e51d05fab5abd 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -67,40 +67,8 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
   // The libc version is likely to be faster for these cases. It can use the
   // address value and run time information about the CPU.
   if (Alignment < Align(4) || !ConstantSize ||
-      ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
-    // Check to see if there is a specialized entry-point for memory zeroing.
-    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
-
-    if (const char *bzeroName =
-            (ValC && ValC->isZero())
-                ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
-                : nullptr) {
-      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
-      Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
-      TargetLowering::ArgListTy Args;
-      TargetLowering::ArgListEntry Entry;
-      Entry.Node = Dst;
-      Entry.Ty = IntPtrTy;
-      Args.push_back(Entry);
-      Entry.Node = Size;
-      Args.push_back(Entry);
-
-      TargetLowering::CallLoweringInfo CLI(DAG);
-      CLI.setDebugLoc(dl)
-          .setChain(Chain)
-          .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                        DAG.getExternalSymbol(bzeroName, IntPtr),
-                        std::move(Args))
-          .setDiscardResult();
-
-      std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
-      return CallResult.second;
-    }
-
-    // Otherwise have the target-independent code call memset.
+      ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) 
     return SDValue();
-  }
 
   uint64_t SizeVal = ConstantSize->getZExtValue();
   SDValue InFlag;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
index ab819a42729ac..6be7b3822a711 100644
--- a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll
@@ -1,48 +1,33 @@
-; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-apple-darwin -o - \
-; RUN: | FileCheck --check-prefixes=CHECK,CHECK-DARWIN %s
-; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | \
-; RUN:   FileCheck --check-prefixes=CHECK,CHECK-LINUX %s
+; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-apple-darwin -o - | FileCheck %s --check-prefix=DARWIN
+; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu    -o - | FileCheck %s --check-prefix=LINUX
 ; <rdar://problem/14199482> ARM64: Calls to bzero() replaced with calls to memset()
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+
 ; CHECK-LABEL: fct1:
-; For small size (<= 256), we do not change memset to bzero.
-; CHECK-DARWIN: {{b|bl}} _memset
-; CHECK-LINUX: {{b|bl}} memset
+; Constant size memset to zero.
+; DARWIN: {{b|bl}} _bzero
+; LINUX: {{b|bl}} memset
 define void @fct1(i8* nocapture %ptr) minsize {
-entry:
   tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false)
   ret void
 }
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
-
-; CHECK-LABEL: fct2:
-; When the size is bigger than 256, change into bzero.
-; CHECK-DARWIN: {{b|bl}} _bzero
-; CHECK-LINUX: {{b|bl}} memset
-define void @fct2(i8* nocapture %ptr) minsize {
-entry:
-  tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false)
-  ret void
-}
-
 ; CHECK-LABEL: fct3:
-; For unknown size, change to bzero.
-; CHECK-DARWIN: {{b|bl}} _bzero
-; CHECK-LINUX: {{b|bl}} memset
+; Variable size memset to zero.
+; DARWIN: {{b|bl}} _bzero
+; LINUX: {{b|bl}} memset
 define void @fct3(i8* nocapture %ptr, i32 %unknown) minsize {
-entry:
   %conv = sext i32 %unknown to i64
   tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false)
   ret void
 }
 
 ; CHECK-LABEL: fct4:
-; Size <= 256, no change.
-; CHECK-DARWIN: {{b|bl}} _memset
-; CHECK-LINUX: {{b|bl}} memset
+; Variable size checked memset to zero.
+; DARWIN: {{b|bl}} _bzero
+; LINUX: {{b|bl}} memset
 define void @fct4(i8* %ptr) minsize {
-entry:
   %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
   %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp)
   ret void
@@ -52,23 +37,11 @@ declare i8* @__memset_chk(i8*, i32, i64, i64)
 
 declare i64 @llvm.objectsize.i64(i8*, i1)
 
-; CHECK-LABEL: fct5:
-; Size > 256, change.
-; CHECK-DARWIN: {{b|bl}} _bzero
-; CHECK-LINUX: {{b|bl}} memset
-define void @fct5(i8* %ptr) minsize {
-entry:
-  %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
-  %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp)
-  ret void
-}
-
 ; CHECK-LABEL: fct6:
 ; Size = unknown, change.
-; CHECK-DARWIN: {{b|bl}} _bzero
-; CHECK-LINUX: {{b|bl}} memset
+; DARWIN: {{b|bl}} _bzero
+; LINUX: {{b|bl}} memset
 define void @fct6(i8* %ptr, i32 %unknown) minsize {
-entry:
   %conv = sext i32 %unknown to i64
   %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
   %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp)
@@ -80,10 +53,9 @@ entry:
 
 ; CHECK-LABEL: fct7:
 ; memset with something that is not a zero, no change.
-; CHECK-DARWIN: {{b|bl}} _memset
-; CHECK-LINUX: {{b|bl}} memset
+; DARWIN: {{b|bl}} _memset
+; LINUX: {{b|bl}} memset
 define void @fct7(i8* %ptr) minsize {
-entry:
   %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
   %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp)
   ret void
@@ -91,10 +63,9 @@ entry:
 
 ; CHECK-LABEL: fct8:
 ; memset with something that is not a zero, no change.
-; CHECK-DARWIN: {{b|bl}} _memset
-; CHECK-LINUX: {{b|bl}} memset
+; DARWIN: {{b|bl}} _memset
+; LINUX: {{b|bl}} memset
 define void @fct8(i8* %ptr) minsize {
-entry:
   %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
   %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp)
   ret void
@@ -102,10 +73,9 @@ entry:
 
 ; CHECK-LABEL: fct9:
 ; memset with something that is not a zero, no change.
-; CHECK-DARWIN: {{b|bl}} _memset
-; CHECK-LINUX: {{b|bl}} memset
+; DARWIN: {{b|bl}} _memset
+; LINUX: {{b|bl}} memset
 define void @fct9(i8* %ptr, i32 %unknown) minsize {
-entry:
   %conv = sext i32 %unknown to i64
   %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
   %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp)

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll
index b452a9ee419f3..fbf12e80b6b53 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32.ll
@@ -732,11 +732,26 @@ define { [18 x i8] }* @test_gep_nonpow2({ [18 x i8] }* %a0, i32 %a1) {
   ret { [18 x i8] }* %tmp0
 }
 
+define void @test_memset(i64 %in, i8 %value)  {
+; CHECK-LABEL: test_memset:
+; CHECK-DAG: and x8, x0, #0xffffffff
+; CHECK-DAG: lsr x2, x0, #32
+; CHECK-DAG: mov x0, x8
+; CHECK: b _memset
+
+  %ptr.i32 = trunc i64 %in to i32
+  %size.64 = lshr i64 %in, 32
+  %size = trunc i64 %size.64 to i32
+  %ptr = inttoptr i32 %ptr.i32 to i8*
+  tail call void @llvm.memset.p0i8.i32(i8* align 4 %ptr, i8 %value, i32 %size, i1 false)
+  ret void
+}
+
 define void @test_bzero(i64 %in)  {
 ; CHECK-LABEL: test_bzero:
 ; CHECK-DAG: lsr x1, x0, #32
 ; CHECK-DAG: and x0, x0, #0xffffffff
-; CHECK: bl _bzero
+; CHECK: b _bzero
 
   %ptr.i32 = trunc i64 %in to i32
   %size.64 = lshr i64 %in, 32


        


More information about the llvm-commits mailing list