[llvm] b6043f9 - [RA] Disable split around hint register if optimize for size (#68619)

Wed Oct 11 14:57:20 PDT 2023

Author: weiguozhi
Date: 2023-10-11T14:57:15-07:00
New Revision: b6043f98673e33f17564dd240be3878c61e22333

URL: https://github.com/llvm/llvm-project/commit/b6043f98673e33f17564dd240be3878c61e22333
DIFF: https://github.com/llvm/llvm-project/commit/b6043f98673e33f17564dd240be3878c61e22333.diff

LOG: [RA] Disable split around hint register if optimize for size (#68619)

Split a virtual register with hint may generate COPY instructions in
multiple cold basic blocks, and increase code size. So disable this
split when the function is optimized for size.

Added: 
    llvm/test/CodeGen/X86/no-split-size.ll

Modified: 
    llvm/lib/CodeGen/RegAllocGreedy.cpp
    llvm/test/CodeGen/ARM/thumb2-size-opt.ll
    llvm/test/DebugInfo/ARM/sdag-split-arg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4d476924a7dbf7b..349d8b0975f3a10 100644

--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1217,6 +1217,12 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
                                      const LiveInterval &VirtReg,
                                      SmallVectorImpl<Register> &NewVRegs,
                                      AllocationOrder &Order) {
+  // Split the VirtReg may generate COPY instructions in multiple cold basic
+  // blocks, and increase code size. So we avoid it when the function is
+  // optimized for size.
+  if (MF->getFunction().hasOptSize())
+    return false;
+
   // Don't allow repeated splitting as a safe guard against looping.
   if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
     return false;

diff  --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
index 8cf7a702e8ed54d..f9f29fc064a20ce 100644
--- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
+++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -85,8 +85,8 @@ entry:
 
 define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize {
 ; CHECK-LABEL: bundled_instruction:
-; CHECK: iteee ne
-; CHECK: ldmeq r2!, {{{r[0-9]+}}}
+; CHECK: itee ne
+; CHECK: ldmeq r3!, {{{r[0-9]+}}}
   br i1 %tst, label %true, label %false
 
 true:

diff  --git a/llvm/test/CodeGen/X86/no-split-size.ll b/llvm/test/CodeGen/X86/no-split-size.ll
new file mode 100644
index 000000000000000..c1f93acd77dee27
--- /dev/null
+++ b/llvm/test/CodeGen/X86/no-split-size.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; @foo is optimized for size. Variables %p2, %p3, %p4, %p5 and %p6 are not split
+; in cold blocks.
+
+define i64 @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) optsize {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -48
+; CHECK-NEXT:    .cfi_offset %r12, -40
+; CHECK-NEXT:    .cfi_offset %r13, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
+; CHECK-NEXT:    .cfi_offset %r15, -16
+; CHECK-NEXT:    movq %r9, %r14
+; CHECK-NEXT:    movq %r8, %rbx
+; CHECK-NEXT:    movq %rcx, %r12
+; CHECK-NEXT:    movq %rdx, %r15
+; CHECK-NEXT:    movq %rsi, %r13
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    je .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %if.else
+; CHECK-NEXT:    testq %r13, %r13
+; CHECK-NEXT:    movq %r15, %rax
+; CHECK-NEXT:    je .LBB0_3
+; CHECK-NEXT:  .LBB0_4: # %if.end
+; CHECK-NEXT:    addq %r13, %rax
+; CHECK-NEXT:    addq %r12, %r15
+; CHECK-NEXT:    addq %rax, %r15
+; CHECK-NEXT:    addq %r14, %rbx
+; CHECK-NEXT:    addq %r15, %rbx
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %r12
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r13
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_1: # %if.then
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    callq bar1 at PLT
+; CHECK-NEXT:    jmp .LBB0_4
+; CHECK-NEXT:  .LBB0_3: # %if.then2
+; CHECK-NEXT:    callq bar2 at PLT
+; CHECK-NEXT:    jmp .LBB0_4
+entry:
+  %tobool.not = icmp eq ptr %ptr, null
+  br i1 %tobool.not, label %if.then, label %if.else, !prof !5
+
+if.then:                                          ; preds = %entry
+  %call1 = call i64 @bar1()
+  br label %if.end
+
+if.else:
+  %cond = icmp eq i64 %p2, 0
+  br i1 %cond, label %if.then2, label %if.end, !prof !5
+
+if.then2:
+  %call2 = call i64 @bar2()
+  br label %if.end
+
+if.end:
+  %call = phi i64 [ %call1, %if.then ], [%call2, %if.then2], [ %p3, %if.else ]
+  %add1 = add i64 %call, %p2
+  %add2 = add i64 %add1, %p3
+  %add3 = add i64 %add2, %p4
+  %add4 = add i64 %add3, %p5
+  %res = add i64 %add4, %p6
+  ret i64 %res
+}
+
+!5 = !{!"branch_weights", i32 1, i32 2000}
+
+declare i64 @bar1()
+declare i64 @bar2()

diff  --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
index de1d822a8c8015f..9699c102c0b76b8 100644
--- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
+++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll
@@ -19,8 +19,8 @@ target triple = "thumbv7k-apple-watchos2.0.0"
 ; Function Attrs: optsize ssp
 define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 {
   tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0
-  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5
+  ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4
 
   %2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21
   %3 = icmp eq i64 %2, %0, !dbg !19