[llvm] 4358fa7 - [Statepoints] Update DAG root after emitting statepoint.
Denis Antrushin via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 9 06:22:27 PDT 2020
Author: Denis Antrushin
Date: 2020-09-09T20:22:10+07:00
New Revision: 4358fa782e3def5176f6e70c72de8e65702aeb0f
URL: https://github.com/llvm/llvm-project/commit/4358fa782e3def5176f6e70c72de8e65702aeb0f
DIFF: https://github.com/llvm/llvm-project/commit/4358fa782e3def5176f6e70c72de8e65702aeb0f.diff
LOG: [Statepoints] Update DAG root after emitting statepoint.
Since we always generate CopyToRegs for statepoint results,
we must update DAG root after emitting statepoint, so that
these copies are scheduled before any possible local uses.
Note: getControlRoot() flushes all PendingExports, not only
those we generates for relocates. If that'll become a problem,
we can change it to flushing relocate exports only.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D87251
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
llvm/test/CodeGen/X86/statepoint-vreg.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 7cbeb1016c67..83c72ca2da39 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -841,7 +841,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
Register Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy, None);
- SDValue Chain = DAG.getEntryNode();
+ SDValue Chain = DAG.getRoot();
RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr);
PendingExports.push_back(Chain);
@@ -919,8 +919,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Remove original call node
DAG.DeleteNode(CallNode);
- // DON'T set the root - under the assumption that it's already set past the
- // inserted node we created.
+ // Since we always emit CopyToRegs (even for local relocates), we must
+ // update root, so that they are emitted before any local uses.
+ (void)getControlRoot();
// TODO: A better future implementation would be to emit a single variable
// argument, variable return value STATEPOINT node here and then hookup the
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll
index 66b984b90536..6a65abed5754 100644
--- a/llvm/test/CodeGen/X86/statepoint-vreg.ll
+++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll
@@ -8,8 +8,12 @@ declare i1 @return_i1()
declare void @func()
declare void @"some_call"(i64 addrspace(1)*)
declare void @consume(i32 addrspace(1)*)
+declare i32 @consume1(i32) gc "statepoint-example"
declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*)
+declare void @consume3(float) gc "statepoint-example"
+declare float @consume4(i64) gc "statepoint-example"
declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*)
+
declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*)
declare i32 @"personality_function"()
@@ -590,6 +594,90 @@ entry:
ret void
}
+; test multiple statepoints/relocates within single block.
+; relocates must be properly scheduled w.r.t. statepoints
+define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-example" {
+; CHECK-LABEL: test_sched:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movq %rsi, %rbx
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: callq consume3
+; CHECK-NEXT: .Ltmp25:
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: cvtsi2sd %ebp, %xmm0
+; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: nopl 8(%rax,%rax)
+; CHECK-NEXT: .Ltmp26:
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss %xmm0, (%rsp)
+; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: nopl 8(%rax,%rax)
+; CHECK-NEXT: .Ltmp27:
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero
+; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss %xmm0, (%rsp)
+; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: nopl 8(%rax,%rax)
+; CHECK-NEXT: .Ltmp28:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: xorpd %xmm0, %xmm0
+; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
+; CHECK-NEXT: # xmm1 = mem[0],zero
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; CHECK-NEXT: movabsq $9223372036854775807, %rdi # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: cmovbeq %rax, %rdi
+; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss %xmm0, (%rsp)
+; CHECK-NEXT: nopl 8(%rax,%rax)
+; CHECK-NEXT: .Ltmp29:
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %token0 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 0, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %2) ]
+ %reloc1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token0, i32 0, i32 0) ; (%2, %2)
+ %tmp1 = sitofp i32 %1 to double
+ %to_max.i29 = fcmp ogt double %tmp1, 0.000000e+00
+ %token1 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %reloc1) ]
+ %reloc2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1)
+ %reloc3 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1)
+ %token2 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc2, i8 addrspace(1)* %reloc3) ]
+ %reloc4 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 0, i32 0) ; (%reloc3, %reloc2)
+ %reloc5 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 1, i32 1) ; (%reloc3, %reloc3)
+ %token3 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 5, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc4, i8 addrspace(1)* %reloc5) ]
+ %reloc6 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token3, i32 1, i32 0) ; (%reloc5, %reloc4)
+ %tmp5 = select i1 %to_max.i29, i64 9223372036854775807, i64 0
+ %token4 = call token (i64, i32, float (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 2, i32 5, float (i64)* nonnull @consume4, i32 1, i32 0, i64 %tmp5, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"() ]
+ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 immarg, i32 immarg, float (i64)*, i32 immarg, i32 immarg, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 immarg, i32 immarg, i32 (i32)*, i32 immarg, i32 immarg, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 immarg, i32 immarg, void (float)*, i32 immarg, i32 immarg, ...)
declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
More information about the llvm-commits
mailing list