[llvm] b6043f9 - [RA] Disable split around hint register if optimize for size (#68619)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 11 14:57:20 PDT 2023
Author: weiguozhi
Date: 2023-10-11T14:57:15-07:00
New Revision: b6043f98673e33f17564dd240be3878c61e22333
URL: https://github.com/llvm/llvm-project/commit/b6043f98673e33f17564dd240be3878c61e22333
DIFF: https://github.com/llvm/llvm-project/commit/b6043f98673e33f17564dd240be3878c61e22333.diff
LOG: [RA] Disable split around hint register if optimize for size (#68619)
Split a virtual register with hint may generate COPY instructions in
multiple cold basic blocks, and increase code size. So disable this
split when the function is optimized for size.
Added:
llvm/test/CodeGen/X86/no-split-size.ll
Modified:
llvm/lib/CodeGen/RegAllocGreedy.cpp
llvm/test/CodeGen/ARM/thumb2-size-opt.ll
llvm/test/DebugInfo/ARM/sdag-split-arg.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4d476924a7dbf7b..349d8b0975f3a10 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1217,6 +1217,12 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
AllocationOrder &Order) {
+ // Split the VirtReg may generate COPY instructions in multiple cold basic
+ // blocks, and increase code size. So we avoid it when the function is
+ // optimized for size.
+ if (MF->getFunction().hasOptSize())
+ return false;
+
// Don't allow repeated splitting as a safe guard against looping.
if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
return false;
diff --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
index 8cf7a702e8ed54d..f9f29fc064a20ce 100644
--- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
+++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -85,8 +85,8 @@ entry:
define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize {
; CHECK-LABEL: bundled_instruction:
-; CHECK: iteee ne
-; CHECK: ldmeq r2!, {{{r[0-9]+}}}
+; CHECK: itee ne
+; CHECK: ldmeq r3!, {{{r[0-9]+}}}
br i1 %tst, label %true, label %false
true:
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
new file mode 100644
index 000000000000000..c1f93acd77dee27
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
+; in cold blocks.
+
+define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) optsize {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -48
+; CHECK-NEXT: .cfi_offset %r12, -40
+; CHECK-NEXT: .cfi_offset %r13, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %r15, -16
+; CHECK-NEXT: movq %r9, %r14
+; CHECK-NEXT: movq %r8, %rbx
+; CHECK-NEXT: movq %rcx, %r12
+; CHECK-NEXT: movq %rdx, %r15
+; CHECK-NEXT: movq %rsi, %r13
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je .LBB0_1
+; CHECK-NEXT: # %bb.2: # %if.else
+; CHECK-NEXT: testq %r13, %r13
+; CHECK-NEXT: movq %r15, %rax
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: .LBB0_4: # %if.end
+; CHECK-NEXT: addq %r13, %rax
+; CHECK-NEXT: addq %r12, %r15
+; CHECK-NEXT: addq %rax, %r15
+; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: addq %r15, %rbx
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_1: # %if.then
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: callq bar1 at PLT
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_3: # %if.then2
+; CHECK-NEXT: callq bar2 at PLT
+; CHECK-NEXT: jmp .LBB0_4
+entry:
+ %tobool.not = icmp eq ptr %ptr, null
+ br i1 %tobool.not, label %if.then, label %if.else, !prof !5
+
+if.then: ; preds = %entry
+ %call1 = call i64 @bar1()
+ br label %if.end
+
+if.else:
+ %cond = icmp eq i64 %p2, 0
+ br i1 %cond, label %if.then2, label %if.end, !prof !5
+
+if.then2:
+ %call2 = call i64 @bar2()
+ br label %if.end
+
+if.end:
+ %call = phi i64 [ %call1, %if.then ], [%call2, %if.then2], [ %p3, %if.else ]
+ %add1 = add i64 %call, %p2
+ %add2 = add i64 %add1, %p3
+ %add3 = add i64 %add2, %p4
+ %add4 = add i64 %add3, %p5
+ %res = add i64 %add4, %p6
+ ret i64 %res
+}
+
+!5 = !{!"branch_weights", i32 1, i32 2000}
+
+declare i64 @bar1()
+declare i64 @bar2()
diff --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
index de1d822a8c8015f..9699c102c0b76b8 100644
--- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
+++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7k-apple-watchos2.0.0"
; Function Attrs: optsize ssp
define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 {
tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18
- ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0
- ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1
+ ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5
+ ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4
%2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21
%3 = icmp eq i64 %2, %0, !dbg !19
More information about the llvm-commits
mailing list