[PATCH] D35035: [InstCombine] Prevent memcpy generation for small data size
DIVYA SHANMUGHAN via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 14 08:42:34 PDT 2017
DIVYA updated this revision to Diff 110982.
https://reviews.llvm.org/D35035
Files:
lib/Transforms/InstCombine/InstCombineCalls.cpp
test/DebugInfo/X86/array2.ll
test/Transforms/InstCombine/memcpy-to-load.ll
Index: test/Transforms/InstCombine/memcpy-to-load.ll
===================================================================
--- test/Transforms/InstCombine/memcpy-to-load.ll
+++ test/Transforms/InstCombine/memcpy-to-load.ll
@@ -77,9 +77,36 @@
}
define void @copy_16_bytes(i8* %d, i8* %s) {
-; ALL-LABEL: @copy_16_bytes(
-; ALL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[D:%.*]], i8* [[S:%.*]], i32 16, i32 1, i1 false)
-; ALL-NEXT: ret void
+
+; If there is no datalayout, then all memcpy of size less than 16 bytes (and power-of-2) will be expanded inline with load/store
+
+; NODL-LABEL: @copy_16_bytes(
+; NODL-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[D:%.*]], i8* [[S:%.*]], i32 16, i32 1, i1 false)
+; NODL-NEXT: ret void
+;
+; For datalayout with largest legal integer type size of 4 bytes, all memcpy with size less than 16 bytes (and power-of-2) will be expanded inline with load/store
+;
+; I32-LABEL: @copy_16_bytes(
+; I32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[D:%.*]], i8* [[S:%.*]], i32 16, i32 1, i1 false)
+; I32-NEXT: ret void
+;
+; For datalayout with largest legal integer type size of 8 bytes, all memcpy with size less than 32 bytes (and power-of-2) will be expanded inline with load/store
+;
+; I64-LABEL: @copy_16_bytes(
+; I64-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i128*
+; I64-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i128*
+; I64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]], align 1
+; I64-NEXT: store i128 [[TMP3]], i128* [[TMP2]], align 1
+; I64-NEXT: ret void
+;
+; For datalayout with largest legal integer type size of 16 bytes, all memcpy with size less than 64 bytes (and power-of-2) will be expanded inline with load/store
+;
+; I128-LABEL: @copy_16_bytes(
+; I128-NEXT: [[TMP1:%.*]] = bitcast i8* [[S:%.*]] to i128*
+; I128-NEXT: [[TMP2:%.*]] = bitcast i8* [[D:%.*]] to i128*
+; I128-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]], align 1
+; I128-NEXT: store i128 [[TMP3]], i128* [[TMP2]], align 1
+; I128-NEXT: ret void
;
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %d, i8* %s, i32 16, i32 1, i1 false)
ret void
Index: test/DebugInfo/X86/array2.ll
===================================================================
--- test/DebugInfo/X86/array2.ll
+++ test/DebugInfo/X86/array2.ll
@@ -16,9 +16,8 @@
; Test that we correctly lower dbg.declares for arrays.
;
; CHECK: define i32 @main
-; CHECK: call void @llvm.dbg.value(metadata i32 42, i64 0, metadata ![[ARRAY:[0-9]+]], metadata ![[EXPR:[0-9]+]])
-; CHECK: ![[ARRAY]] = !DILocalVariable(name: "array",{{.*}} line: 6
-; CHECK: ![[EXPR]] = !DIExpression(DW_OP_LLVM_fragment, 0, 32)
+; CHECK: tail call void @llvm.dbg.value(metadata i32 [[ARGC:%.*]], i64 0, metadata !22, metadata !12), !dbg !23
+; CHECK: tail call void @llvm.dbg.value(metadata i8** [[ARGV:%.*]], i64 0, metadata !24, metadata !12), !dbg !23
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -182,8 +182,6 @@
return MI;
}
- // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
- // load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
if (!MemOpLength) return nullptr;
@@ -194,8 +192,16 @@
uint64_t Size = MemOpLength->getLimitedValue();
assert(Size && "0-sized memory transferring should be removed already.");
- if (Size > 8 || (Size&(Size-1)))
- return nullptr; // If not 1/2/4/8 bytes, exit.
+
+ // Since we don't have perfect knowledge here, make some assumptions: assume
+ // the maximum allowed stores for memcpy operation is the same size as the
+ // largest legal integer size.
+ unsigned LargestInt = DL.getLargestLegalIntTypeSizeInBits();
+ if (LargestInt == 0)
+ LargestInt = 32;
+
+ if (Size > 2*LargestInt/8 || (Size&(Size-1)))
+ return nullptr;
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D35035.110982.patch
Type: text/x-patch
Size: 4186 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170814/35b68c0b/attachment-0001.bin>
More information about the llvm-commits
mailing list