[llvm] 755f91f - [Statepoint] Enable cross block relocates w/vreg lowering

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 29 13:32:58 PDT 2020


Author: Philip Reames
Date: 2020-07-29T13:32:51-07:00
New Revision: 755f91f12cf08da9c6553de901135945f2c6083a

URL: https://github.com/llvm/llvm-project/commit/755f91f12cf08da9c6553de901135945f2c6083a
DIFF: https://github.com/llvm/llvm-project/commit/755f91f12cf08da9c6553de901135945f2c6083a.diff

LOG: [Statepoint] Enable cross block relocates w/vreg lowering

This change is mechanical, it just removes the restriction and updates tests.  The key building blocks were submitted in 31342eb and 8fe2abc.

Note that this (and preceeding changes) entirely subsumes D83965.  I did includes a couple of it's tests.

>From the codegen changes, an interesting observation: this doesn't actual reduce spilling, it just let's the register allocator do it's job.  That results in a slightly different overall result which has both pros and cons over the eager spill lowering.  (i.e. We'll have some perf tuning to do once this is stable.)

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
    llvm/test/CodeGen/X86/statepoint-vreg-details.ll
    llvm/test/CodeGen/X86/statepoint-vreg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 044376c66670..f5d29c11790c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -545,16 +545,6 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   const unsigned MaxTiedRegs = 15; // Max  number of tied regs MI can have.
   unsigned MaxVRegPtrs =
       std::min(MaxTiedRegs, MaxRegistersForGCPointers.getValue());
-  // Use old spill scheme for cross-block relocates.
-  if (SI.StatepointInstr) {
-    const BasicBlock *BB = SI.StatepointInstr->getParent();
-    bool NonLocalReloc =
-        llvm::any_of(SI.GCRelocates, [BB](const GCRelocateInst *R) {
-          return R->getParent() != BB;
-        });
-    if (NonLocalReloc)
-      MaxVRegPtrs = 0;
-  }
 
   LLVM_DEBUG(dbgs() << "Desiding how to lower GC Pointers:\n");
   unsigned CurNumVRegs = 0;

diff  --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
index bfd08f983103..16939d770fed 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll
@@ -215,27 +215,25 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint
 ; CHECK-VREG:  bb.0.entry:
 ; CHECK-VREG:         %1:gr32 = COPY $esi
 ; CHECK-VREG-NEXT:    %0:gr64 = COPY $rdi
-; CHECK-VREG-NEXT:    %3:gr8 = COPY %1.sub_8bit
-; CHECK-VREG-NEXT:    MOV64mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store 8 into %stack.0)
+; CHECK-VREG-NEXT:    %4:gr8 = COPY %1.sub_8bit
 ; CHECK-VREG-NEXT:    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-; CHECK-VREG-NEXT:    STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al :: (volatile load store 8 on %stack.0)
+; CHECK-VREG-NEXT:    %2:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, %0, %0(tied-def 0), csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al
 ; CHECK-VREG-NEXT:    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-; CHECK-VREG-NEXT:    %4:gr8 = COPY $al
-; CHECK-VREG-NEXT:    %2:gr8 = COPY %4
-; CHECK-VREG-NEXT:    TEST8ri killed %3, 1, implicit-def $eflags
+; CHECK-VREG-NEXT:    %5:gr8 = COPY $al
+; CHECK-VREG-NEXT:    %3:gr8 = COPY %5
+; CHECK-VREG-NEXT:    TEST8ri killed %4, 1, implicit-def $eflags
 ; CHECK-VREG-NEXT:    JCC_1 %bb.2, 4, implicit $eflags
 ; CHECK-VREG-NEXT:    JMP_1 %bb.1
 ; CHECK-VREG:       bb.1.left:
-; CHECK-VREG-NEXT:    %6:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)
 ; CHECK-VREG-NEXT:    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-; CHECK-VREG-NEXT:    $rdi = COPY %6
+; CHECK-VREG-NEXT:    $rdi = COPY %2
 ; CHECK-VREG-NEXT:    CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp
 ; CHECK-VREG-NEXT:    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
-; CHECK-VREG-NEXT:    $al = COPY %2
+; CHECK-VREG-NEXT:    $al = COPY %3
 ; CHECK-VREG-NEXT:    RET 0, $al
 ; CHECK-VREG:       bb.2.right:
-; CHECK-VREG-NEXT:    %5:gr8 = MOV8ri 1
-; CHECK-VREG-NEXT:    $al = COPY %5
+; CHECK-VREG-NEXT:    %6:gr8 = MOV8ri 1
+; CHECK-VREG-NEXT:    $al = COPY %6
 ; CHECK-VREG-NEXT:    RET 0, $al
 
 entry:

diff  --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll
index 8c69af77c2f2..6e6240d737ec 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll
@@ -6,10 +6,12 @@ target triple = "x86_64-pc-linux-gnu"
 
 declare i1 @return_i1()
 declare void @func()
+declare void @"some_call"(i64 addrspace(1)*)
 declare void @consume(i32 addrspace(1)*)
 declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*)
 declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)
 declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*)
+declare i32 @"personality_function"()
 
 ; test most simple relocate
 define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
@@ -231,30 +233,31 @@ define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    pushq %r14
 ; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    .cfi_offset %rbx, -24
+; CHECK-NEXT:    .cfi_offset %rbx, -32
+; CHECK-NEXT:    .cfi_offset %r14, -24
 ; CHECK-NEXT:    .cfi_offset %rbp, -16
 ; CHECK-NEXT:    movl %esi, %ebp
-; CHECK-NEXT:    movq %rdi, (%rsp)
+; CHECK-NEXT:    movq %rdi, %rbx
 ; CHECK-NEXT:    callq return_i1
 ; CHECK-NEXT:  .Ltmp7:
 ; CHECK-NEXT:    testb $1, %bpl
 ; CHECK-NEXT:    je .LBB7_2
 ; CHECK-NEXT:  # %bb.1: # %left
-; CHECK-NEXT:    movl %eax, %ebx
-; CHECK-NEXT:    movq (%rsp), %rdi
+; CHECK-NEXT:    movl %eax, %r14d
+; CHECK-NEXT:    movq %rbx, %rdi
 ; CHECK-NEXT:    callq consume
-; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    movl %r14d, %eax
 ; CHECK-NEXT:    jmp .LBB7_3
 ; CHECK-NEXT:  .LBB7_2: # %right
 ; CHECK-NEXT:    movb $1, %al
 ; CHECK-NEXT:  .LBB7_3: # %right
-; CHECK-NEXT:    addq $8, %rsp
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
 ; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r14
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
@@ -374,9 +377,167 @@ entry:
   ret void
 }
 
+define i64 addrspace(1)* @test_basic_invoke(i64 addrspace(1)* %obj,
+; CHECK-LABEL: test_basic_invoke:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset %rbx, -24
+; CHECK-NEXT:    .cfi_offset %r14, -16
+; CHECK-NEXT:    movq %rsi, %r14
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:  .Ltmp12:
+; CHECK-NEXT:    callq some_call
+; CHECK-NEXT:  .Ltmp15:
+; CHECK-NEXT:  .Ltmp13:
+; CHECK-NEXT:  # %bb.1: # %normal_return
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:  .LBB11_2: # %normal_return
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB11_3: # %exceptional_return
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:  .Ltmp14:
+; CHECK-NEXT:    movq %r14, %rax
+; CHECK-NEXT:    jmp .LBB11_2
+                                            i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @"personality_function" {
+entry:
+  %0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0) ["gc-live" (i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1), "deopt" (i32 0, i32 -1, i32 0, i32 0, i32 0)]
+          to label %invoke_safepoint_normal_dest unwind label %exceptional_return
+
+invoke_safepoint_normal_dest:
+  %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 0, i32 0)
+  %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 1, i32 1)
+  br label %normal_return
+
+normal_return:
+  ret i64 addrspace(1)* %obj.relocated
+
+exceptional_return:
+  %landing_pad = landingpad token
+          cleanup
+  %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0)
+  %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 1, i32 1)
+  ret i64 addrspace(1)* %obj1.relocated1
+}
+
+define i64 addrspace(1)* @test_invoke_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) gc "statepoint-example" personality i32 ()* @"personality_function" {
+; CHECK-LABEL: test_invoke_same_val:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset %rbx, -40
+; CHECK-NEXT:    .cfi_offset %r14, -32
+; CHECK-NEXT:    .cfi_offset %r15, -24
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rdx, %rbx
+; CHECK-NEXT:    movq %rsi, %rbp
+; CHECK-NEXT:    movl %edi, %r14d
+; CHECK-NEXT:    testb $1, %r14b
+; CHECK-NEXT:    je .LBB12_2
+; CHECK-NEXT:  # %bb.1: # %left
+; CHECK-NEXT:  .Ltmp19:
+; CHECK-NEXT:    movq %rbp, %rdi
+; CHECK-NEXT:    callq some_call
+; CHECK-NEXT:  .Ltmp22:
+; CHECK-NEXT:  .Ltmp20:
+; CHECK-NEXT:    jmp .LBB12_4
+; CHECK-NEXT:  .LBB12_2: # %right
+; CHECK-NEXT:    movq %rcx, %r15
+; CHECK-NEXT:  .Ltmp16:
+; CHECK-NEXT:    movq %rbp, %rdi
+; CHECK-NEXT:    callq some_call
+; CHECK-NEXT:  .Ltmp23:
+; CHECK-NEXT:  .Ltmp17:
+; CHECK-NEXT:  # %bb.3: # %right.relocs
+; CHECK-NEXT:    movq %r15, %rbp
+; CHECK-NEXT:  .LBB12_4: # %normal_return
+; CHECK-NEXT:    testb $1, %r14b
+; CHECK-NEXT:    cmoveq %rbx, %rbp
+; CHECK-NEXT:  .LBB12_6: # %exceptional_return.left
+; CHECK-NEXT:    movq %rbp, %rax
+; CHECK-NEXT:  .LBB12_7: # %exceptional_return.left
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    popq %r14
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %r15
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB12_8: # %exceptional_return.right
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:  .Ltmp18:
+; CHECK-NEXT:    movq %rbx, %rax
+; CHECK-NEXT:    jmp .LBB12_7
+; CHECK-NEXT:  .LBB12_5: # %exceptional_return.left
+; CHECK-NEXT:  .Ltmp21:
+; CHECK-NEXT:    jmp .LBB12_6
+entry:
+  br i1 %cond, label %left, label %right
+
+left:
+  %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)]
+           to label %left.relocs unwind label %exceptional_return.left
+
+left.relocs:
+  %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 0, i32 0)
+  %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 1, i32 1)
+  br label %normal_return
+
+right:
+  %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 0) ["gc-live"(i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)]
+           to label %right.relocs unwind label %exceptional_return.right
+
+right.relocs:
+  %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 0, i32 0)
+  %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 1, i32 1)
+  br label %normal_return
+
+normal_return:
+  %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs]
+  %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs]
+  %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2
+  ret i64 addrspace(1)* %ret
+
+exceptional_return.left:
+  %landing_pad = landingpad token
+          cleanup
+  %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 0, i32 0)
+  ret i64 addrspace(1)* %val.relocated2
+
+exceptional_return.right:
+  %landing_pad1 = landingpad token
+          cleanup
+  %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 0, i32 0)
+  ret i64 addrspace(1)* %val.relocated3
+}
+
 declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
 declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
 declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
 declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
 declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
 declare i1 @llvm.experimental.gc.result.i1(token)


        


More information about the llvm-commits mailing list