[llvm] [RA] Disable split around hint register if optimize for size (PR #68619)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 9 11:52:31 PDT 2023
https://github.com/weiguozhi created https://github.com/llvm/llvm-project/pull/68619
Split a virtual register with hint may generate COPY instructions in multiple cold basic blocks, and increase code size. So disable this split when the function is optimized for size.
>From 6fe59120e07b7b4d067d51c4c42035135b612d8c Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 9 Oct 2023 18:44:21 +0000
Subject: [PATCH] [RA] Disable split around hint register if optimize for size
Split a virtual register with hint may generate COPY instructions in
multiple cold basic blocks, and increase code size. So disable this
split when the function is optimized for size.
---
llvm/lib/CodeGen/RegAllocGreedy.cpp | 3 +
llvm/test/CodeGen/ARM/thumb2-size-opt.ll | 4 +-
llvm/test/CodeGen/X86/no-split-size.ll | 92 +++++++++++++++++++++++
llvm/test/DebugInfo/ARM/sdag-split-arg.ll | 4 +-
4 files changed, 99 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/no-split-size.ll
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4d476924a7dbf7b..1bfaba00a267fb6 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1217,6 +1217,9 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
const LiveInterval &VirtReg,
SmallVectorImpl<Register> &NewVRegs,
AllocationOrder &Order) {
+ if (MF->getFunction().hasOptSize())
+ return false;
+
// Don't allow repeated splitting as a safe guard against looping.
if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
return false;
diff --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
index 8cf7a702e8ed54d..f9f29fc064a20ce 100644
--- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
+++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -85,8 +85,8 @@ entry:
define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize {
; CHECK-LABEL: bundled_instruction:
-; CHECK: iteee ne
-; CHECK: ldmeq r2!, {{{r[0-9]+}}}
+; CHECK: itee ne
+; CHECK: ldmeq r3!, {{{r[0-9]+}}}
br i1 %tst, label %true, label %false
true:
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
new file mode 100644
index 000000000000000..305aeea34d8f213
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
+; in cold blocks.
+
+define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) #0 {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: pushq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset %rbx, -48
+; CHECK-NEXT: .cfi_offset %r12, -40
+; CHECK-NEXT: .cfi_offset %r13, -32
+; CHECK-NEXT: .cfi_offset %r14, -24
+; CHECK-NEXT: .cfi_offset %r15, -16
+; CHECK-NEXT: movq %r9, %r14
+; CHECK-NEXT: movq %r8, %rbx
+; CHECK-NEXT: movq %rcx, %r12
+; CHECK-NEXT: movq %rdx, %r15
+; CHECK-NEXT: movq %rsi, %r13
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je .LBB0_1
+; CHECK-NEXT: # %bb.2: # %if.else
+; CHECK-NEXT: testq %r13, %r13
+; CHECK-NEXT: movq %r15, %rax
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: .LBB0_4: # %if.end
+; CHECK-NEXT: addq %r13, %rax
+; CHECK-NEXT: addq %r12, %r15
+; CHECK-NEXT: addq %rax, %r15
+; CHECK-NEXT: addq %r14, %rbx
+; CHECK-NEXT: addq %r15, %rbx
+; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 40
+; CHECK-NEXT: popq %r12
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: popq %r13
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %r14
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %r15
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_1: # %if.then
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: callq bar1 at PLT
+; CHECK-NEXT: jmp .LBB0_4
+; CHECK-NEXT: .LBB0_3: # %if.then2
+; CHECK-NEXT: callq bar2 at PLT
+; CHECK-NEXT: jmp .LBB0_4
+entry:
+ %tobool.not = icmp eq ptr %ptr, null
+ br i1 %tobool.not, label %if.then, label %if.else, !prof !5
+
+if.then: ; preds = %entry
+ %call1 = call i64 @bar1()
+ br label %if.end
+
+if.else:
+ %cond = icmp eq i64 %p2, 0
+ br i1 %cond, label %if.then2, label %if.end, !prof !5
+
+if.then2:
+ %call2 = call i64 @bar2()
+ br label %if.end
+
+if.end:
+ %call = phi i64 [ %call1, %if.then ], [%call2, %if.then2], [ %p3, %if.else ]
+ %add1 = add i64 %call, %p2
+ %add2 = add i64 %add1, %p3
+ %add3 = add i64 %add2, %p4
+ %add4 = add i64 %add3, %p5
+ %res = add i64 %add4, %p6
+ ret i64 %res
+}
+
+attributes #0 = { optsize }
+
+!5 = !{!"branch_weights", i32 1, i32 2000}
+
+declare i64 @bar1()
+declare i64 @bar2()
diff --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
index de1d822a8c8015f..9699c102c0b76b8 100644
--- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
+++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7k-apple-watchos2.0.0"
; Function Attrs: optsize ssp
define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 {
tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18
- ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0
- ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1
+ ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5
+ ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4
%2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21
%3 = icmp eq i64 %2, %0, !dbg !19
More information about the llvm-commits
mailing list