[PATCH] D86883: [X86] Add support for using fast short rep mov for memcpy lowering.

Mon Aug 31 10:31:27 PDT 2020

yamauchi created this revision.
yamauchi added reviewers: davidxl, craig.topper.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.
yamauchi requested review of this revision.

Disabled by default behind an option.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D86883

Files:
  llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
  llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll


Index: llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=-fsrm < %s -o - | FileCheck %s --check-prefix=NOFSRM
+; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=+fsrm < %s -o - | FileCheck %s --check-prefix=FSRM
+; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM
+; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM
+; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
+
+define void @test1(i8* %a, i8* %b, i64 %s) nounwind {
+; NOFSRM-LABEL: test1
+; NOFSRM:       # %bb.0:
+; NOFSRM:         jmp memcpy
+;
+; FSRM-LABEL: test1
+; FSRM:       # %bb.0:
+; FSRM-NEXT:    movq %rdx, %rcx
+; FSRM-NEXT:    rep;movsb (%rsi), %es:(%rdi)
+; FSRM-NEXT:    retq
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %s, i1 0)
+  ret void
+}
Index: llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
===================================================================
--- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -24,6 +24,10 @@
 
 #define DEBUG_TYPE "x86-selectiondag-info"
 
+static cl::opt<bool>
+    UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false),
+                     cl::desc("Use fast short rep mov in memcpy lowering"));
+
 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
     SelectionDAG &DAG, ArrayRef<MCPhysReg> ClobberSet) const {
   // We cannot use TRI->hasBasePointer() until *after* we select all basic
@@ -191,7 +195,9 @@
   const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
 
   SDValue InFlag;
-  Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
+  Chain = DAG.getCopyToReg(
+      Chain, dl, CX,
+      DAG.getZExtOrTrunc(Size, dl, Use64BitRegs ? MVT::i64 : MVT::i32), InFlag);
   InFlag = Chain.getValue(1);
   Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
   InFlag = Chain.getValue(1);
@@ -306,6 +312,10 @@
   const X86Subtarget &Subtarget =
       DAG.getMachineFunction().getSubtarget<X86Subtarget>();
 
+  // If enabled and available, use fast short rep mov.
+  if (UseFSRMForMemcpy && Subtarget.hasFSRM())
+    return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8);
+
   /// Handle constant sizes,
   if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
     return emitConstantSizeRepmov(


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D86883.288986.patch
Type: text/x-patch
Size: 2859 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200831/7d89ccd0/attachment.bin>