[llvm] [RA] Disable split around hint register if optimize for size (PR #68619)

Mon Oct 9 15:28:14 PDT 2023

https://github.com/weiguozhi updated https://github.com/llvm/llvm-project/pull/68619

>From 6fe59120e07b7b4d067d51c4c42035135b612d8c Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 9 Oct 2023 18:44:21 +0000
Subject: [PATCH 1/2] [RA] Disable split around hint register if optimize for
 size

Split a virtual register with hint may generate COPY instructions in
multiple cold basic blocks, and increase code size. So disable this
split when the function is optimized for size.
---
 llvm/lib/CodeGen/RegAllocGreedy.cpp       |  3 +
 llvm/test/CodeGen/ARM/thumb2-size-opt.ll  |  4 +-
 llvm/test/CodeGen/X86/no-split-size.ll    | 92 +++++++++++++++++++++++
 llvm/test/DebugInfo/ARM/sdag-split-arg.ll |  4 +-
 4 files changed, 99 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/no-split-size.ll

diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4d476924a7dbf7b..1bfaba00a267fb6 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1217,6 +1217,9 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
                                      const LiveInterval &VirtReg,
                                      SmallVectorImpl<Register> &NewVRegs,
                                      AllocationOrder &Order) {
+  if (MF->getFunction().hasOptSize())
+    return false;
+
   // Don't allow repeated splitting as a safe guard against looping.
   if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
     return false;
diff --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
index 8cf7a702e8ed54d..f9f29fc064a20ce 100644
--- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
+++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -85,8 +85,8 @@ entry:
 
 define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize {
 ; CHECK-LABEL: bundled_instruction:
-; CHECK: iteee ne
-; CHECK: ldmeq r2!, {{{r[0-9]+}}}
+; CHECK: itee ne
+; CHECK: ldmeq r3!, {{{r[0-9]+}}}
   br i1 %tst, label %true, label %false
 
 true:
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
new file mode 100644
index 000000000000000..305aeea34d8f213
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
+; in cold blocks.
+
+define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) #0 {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -48
+; CHECK-NEXT:    .cfi_offset %r12, -40
+; CHECK-NEXT:    .cfi_offset %r13, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
+; CHECK-NEXT:    .cfi_offset %r15, -16
+; CHECK-NEXT:    movq %r9, %r14
+; CHECK-NEXT:    movq %r8, %rbx
+; CHECK-NEXT:    movq %rcx, %r12
+; CHECK-NEXT:    movq %rdx, %r15
+; CHECK-NEXT:    movq %rsi, %r13
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %if.else
+; CHECK-NEXT:    testq %r13, %r13
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:  .LBB0_4: # %if.end
+; CHECK-NEXT:    addq %r13, %rax
+; CHECK-NEXT:    addq %r12, %r15
+; CHECK-NEXT:    addq %rax, %r15
+; CHECK-NEXT:    addq %r14, %rbx
+; CHECK-NEXT:    addq %r15, %rbx
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_1: # %if.then
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    callq bar1 at PLT
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_3: # %if.then2
+; CHECK-NEXT:    callq bar2 at PLT
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  %tobool.not = icmp eq ptr %ptr, null
+  br i1 %tobool.not, label %if.then, label %if.else, !prof !5
+
+if.then:                                          ; preds = %entry
+  %call1 = call i64 @bar1()
+  br label %if.end
+
+if.else:
+  %cond = icmp eq i64 %p2, 0
+  br i1 %cond, label %if.then2, label %if.end, !prof !5
+
+if.then2:
+  %call2 = call i64 @bar2()
+  br label %if.end
+
+if.end:
+  %call = phi i64 [ %call1, %if.then ], [%call2, %if.then2], [ %p3, %if.else ]
+  %add1 = add i64 %call, %p2
+  %add2 = add i64 %add1, %p3
+  %add3 = add i64 %add2, %p4
+  %add4 = add i64 %add3, %p5
+  %res = add i64 %add4, %p6
+  ret i64 %res
+}
+
+attributes #0 = { optsize }
+
+!5 = !{!"branch_weights", i32 1, i32 2000}
+
+declare i64 @bar1()
+declare i64 @bar2()
diff --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
index de1d822a8c8015f..9699c102c0b76b8 100644
--- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
+++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7k-apple-watchos2.0.0"
 ; Function Attrs: optsize ssp
 define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 {
   tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4
 
   %2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21
   %3 = icmp eq i64 %2, %0, !dbg !19

>From 0ad54faebf26fc41940856304da61158bfc5f556 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot at google.com>
Date: Mon, 9 Oct 2023 22:27:27 +0000
Subject: [PATCH 2/2] [RA] Disable split around hint register if optimize for
 size

Split a virtual register with hint may generate COPY instructions in
multiple cold basic blocks, and increase code size. So disable this
split when the function is optimized for size.
---
 llvm/lib/CodeGen/RegAllocGreedy.cpp    | 3 +++
 llvm/test/CodeGen/X86/no-split-size.ll | 4 +---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 1bfaba00a267fb6..349d8b0975f3a10 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1217,6 +1217,9 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
                                      const LiveInterval &VirtReg,
                                      SmallVectorImpl<Register> &NewVRegs,
                                      AllocationOrder &Order) {
+  // Split the VirtReg may generate COPY instructions in multiple cold basic
+  // blocks, and increase code size. So we avoid it when the function is
+  // optimized for size.
   if (MF->getFunction().hasOptSize())
     return false;
 
diff --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
index 305aeea34d8f213..c1f93acd77dee27 100644
--- a/llvm/test/CodeGen/X86/no-split-size.ll
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -4,7 +4,7 @@
 ; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
 ; in cold blocks.
 
-define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) #0 {
+define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) optsize {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %r15
@@ -84,8 +84,6 @@ if.end:
   ret i64 %res
 }
 
-attributes #0 = { optsize }
-
 !5 = !{!"branch_weights", i32 1, i32 2000}
 
 declare i64 @bar1()