[llvm] 920befb - [FastISel] Reduce spills around mem-intrinsic calls
Paul Robinson via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 9 09:49:55 PST 2020
Author: Paul Robinson
Date: 2020-11-09T09:45:14-08:00
New Revision: 920befb337ae950009d691aed31ab97089009db2
URL: https://github.com/llvm/llvm-project/commit/920befb337ae950009d691aed31ab97089009db2
DIFF: https://github.com/llvm/llvm-project/commit/920befb337ae950009d691aed31ab97089009db2.diff
LOG: [FastISel] Reduce spills around mem-intrinsic calls
FastISel generates instructions to materialize "local values" at the
top of a block, in the hope that these values could be reused within
the block. To reduce spills and restores, FastISel treats calls as
sub-block boundaries, flushing the "local value map" at each call.
This patch treats the mem* intrinsics as if they were calls, because
at O0 generally they are calls. Eliminating these spills/restores is
actually better for debugging (especially a "continue at this line"
command), code size, stack frame size, and maybe even performance.
Differential Revision: https://reviews.llvm.org/D90877
Added:
llvm/test/CodeGen/X86/fastisel-memset-flush.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
llvm/test/CodeGen/AArch64/arm64-abi_align.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 3a5a007371d0..6cf0be5c6533 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1515,6 +1515,20 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return selectXRayCustomEvent(II);
case Intrinsic::xray_typedevent:
return selectXRayTypedEvent(II);
+
+ case Intrinsic::memcpy:
+ case Intrinsic::memcpy_element_unordered_atomic:
+ case Intrinsic::memcpy_inline:
+ case Intrinsic::memmove:
+ case Intrinsic::memmove_element_unordered_atomic:
+ case Intrinsic::memset:
+ case Intrinsic::memset_element_unordered_atomic:
+ // Flush the local value map just like we do for regular calls,
+ // to avoid excessive spills and reloads.
+ // These intrinsics mostly turn into library calls at O0; and
+ // even memcpy_inline should be treated like one for this purpose.
+ flushLocalValueMap();
+ break;
}
return fastLowerIntrinsicCall(II);
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
index b76d453c630a..ba2995affe80 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -290,13 +290,13 @@ entry:
; Space for s2 is allocated at sp
; FAST-LABEL: caller42
-; FAST: sub sp, sp, #96
-; Space for s1 is allocated at fp-24 = sp+56
-; FAST: sub x[[A:[0-9]+]], x29, #24
+; FAST: sub sp, sp, #64
+; Space for s1 is allocated at fp-24 = sp+24
+; FAST: add x[[A:[0-9]+]], sp, #24
; Call memcpy with size = 24 (0x18)
; FAST: mov {{x[0-9]+}}, #24
-; Space for s2 is allocated at sp+32
-; FAST: add x[[A:[0-9]+]], sp, #32
+; Space for s2 is allocated at sp
+; FAST: mov x[[A:[0-9]+]], sp
; FAST: bl _memcpy
%tmp = alloca %struct.s42, align 4
%tmp1 = alloca %struct.s42, align 4
@@ -339,8 +339,8 @@ entry:
; Call memcpy with size = 24 (0x18)
; FAST: mov {{x[0-9]+}}, #24
; FAST: bl _memcpy
-; Space for s2 is allocated at fp-48
-; FAST: sub x[[B:[0-9]+]], x29, #48
+; Space for s2 is allocated at sp+32
+; FAST: add x[[B:[0-9]+]], sp, #32
; Call memcpy again
; FAST: bl _memcpy
; Address of s1 is passed on stack at sp+8
diff --git a/llvm/test/CodeGen/X86/fastisel-memset-flush.ll b/llvm/test/CodeGen/X86/fastisel-memset-flush.ll
new file mode 100644
index 000000000000..5d63f5574393
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fastisel-memset-flush.ll
@@ -0,0 +1,40 @@
+; RUN: %llc_dwarf -O0 < %s | FileCheck %s
+
+define dso_local void @foo() !dbg !7 {
+entry:
+ %a = alloca i32, align 4
+ store i32 0, i32* %a, align 4, !dbg !9
+ %0 = bitcast i32* %a to i8*, !dbg !10
+ call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 -86, i64 4, i1 false), !dbg !10
+ %1 = bitcast i32* %a to i8*, !dbg !11
+ call void @other(i8* %1), !dbg !12
+ ret void, !dbg !13
+}
+; CHECK: callq memset
+; CHECK-NEXT: .loc 1 9 9
+; CHECK-NEXT: leaq
+; CHECK-NEXT: .loc 1 9 3
+; CHECK-NEXT: callq other
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+
+declare dso_local void @other(i8*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0 (https://github.com/llvm/llvm-project eaae6fdf67e1f61599331d69a41a7dafe6199667)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "memset-test.c", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project eaae6fdf67e1f61599331d69a41a7dafe6199667)"}
+!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, type: !8, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 6, column: 7, scope: !7)
+!10 = !DILocation(line: 8, column: 3, scope: !7)
+!11 = !DILocation(line: 9, column: 9, scope: !7)
+!12 = !DILocation(line: 9, column: 3, scope: !7)
+!13 = !DILocation(line: 10, column: 1, scope: !7)
More information about the llvm-commits
mailing list