[PATCH] D48102: Improve handling of COPY instructions with identical value numbers
Tim Renouf via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 18 08:29:18 PDT 2018
tpr added a comment.
********** INTERVALS **********
%67 [3184r,3216r:0) 0 at 3184r weight:0.000000e+00
%72 [384B,544r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000004 [384B,544r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000002 [384B,528r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000001 [384B,528r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000008 [2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3184r:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r weight:0.000000e+00
%86 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L00000004 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L0000000B [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi weight:0.000000e+00
********** MACHINEINSTRS **********
# Machine code for function _amdgpu_cs_main: NoPHIs, TracksLiveness
2864B bb.66.Flow:
; predecessors: %bb.63, %bb.67
successors: %bb.68(0x80000000); %bb.68(100.00%)
2896B S_BRANCH %bb.68
2912B bb.67 (%ir-block.152):
; predecessors: %bb.64, %bb.65
successors: %bb.66(0x80000000); %bb.66(100.00%)
2928B $exec = S_OR_B64 $exec, %5:sreg_64, implicit-def $scc
2944B %8:vreg_1 = COPY %89:vreg_1
2960B %40:sreg_64_xexec = V_CMP_NE_U32_e64 0, %8:vreg_1, implicit $exec
2976B %39:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %40:sreg_64_xexec, implicit $exec
2992B %82:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
3008B undef %81.sub0:vreg_128 = COPY %82:vgpr_32
3024B %81.sub1:vreg_128 = COPY %82:vgpr_32
3040B %81.sub2:vreg_128 = COPY %39:vgpr_32
3056B %73:vreg_128 = COPY %81:vreg_128
3072B %72:vreg_128 = COPY %73:vreg_128
3088B S_BRANCH %bb.66
3104B bb.68 (%ir-block.156):
; predecessors: %bb.66
successors: %bb.4(0x04000000), %bb.1(0x7c000000); %bb.4(3.12%), %bb.1(96.88%)
3120B %64:vgpr_32 = V_ADD_I32_e32 32, %85:vgpr_32, implicit-def dead $vcc, implicit $exec
3136B V_CMP_EQ_U32_e32 0, %64:vgpr_32, implicit-def $vcc, implicit $exec
3152B %63:vgpr_32 = COPY %64:vgpr_32
3168B $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
3184B %67:vreg_128 = COPY %72:vreg_128
3200B %85:vgpr_32 = COPY %63:vgpr_32
3216B %86:vreg_128 = COPY %67:vreg_128
3232B S_CBRANCH_VCCNZ %bb.4, implicit killed $vcc
3248B S_BRANCH %bb.1
# End machine code for function _amdgpu_cs_main.
2576B %72:vreg_128 = COPY %86:vreg_128
Considering merging to VReg_128 with %72 in %86
RHS = %72 [384B,544r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000004 [384B,544r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000002 [384B,528r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000001 [384B,528r:0)[2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3264B:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r L00000008 [2576r,2608B:2)[2864B,2912B:0)[3072r,3104B:1)[3104B,3184r:0) 0 at 2864B-phi 1 at 3072r 2 at 2576r weight:0.000000e+00
LHS = %86 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L00000004 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L0000000B [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi weight:0.000000e+00
merge %86:0 at 3216r into %72:0 at 2864B --> @2864B
merge %72:2 at 2576r into %86:4 at 1920B --> @1920B
LHST = %86 %86 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L00000004 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L0000000B [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi weight:0.000000e+00
merge %86:0 at 3216r into %72:0 at 2864B --> @2864B
merge %72:2 at 2576r into %86:4 at 1920B --> @1920B
joined lanes: [224r,240B:1)[240B,384B:2)[384B,544r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2624r:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r
merge %86:0 at 3216r into %72:0 at 2864B --> @2864B
merge %72:2 at 2576r into %86:4 at 1920B --> @1920B
joined lanes: [224r,240B:1)[240B,384B:2)[384B,528r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r
merge %86:0 at 3216r into %72:0 at 2864B --> @2864B
merge %72:2 at 2576r into %86:4 at 1920B --> @1920B
joined lanes: [224r,240B:1)[240B,384B:2)[384B,528r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r
merge %72:2 at 2576r into %86:4 at 1920B --> @1920B
joined lanes: [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:5)[3072r,3104B:6)[3104B,3184r:5)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 2864B-phi 6 at 3072r
Joined SubRanges %86 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2576r:4)[2608B,2624r:4)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi L00000001 [224r,240B:1)[240B,384B:2)[384B,528r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r L00000002 [224r,240B:1)[240B,384B:2)[384B,528r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r L00000004 [224r,240B:1)[240B,384B:2)[384B,544r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2624r:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r L00000008 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:5)[3072r,3104B:6)[3104B,3184r:5)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 2864B-phi 6 at 3072r weight:0.000000e+00
Expecting instruction removal at 3216r
checking: L00000001 [224r,240B:1)[240B,384B:2)[384B,528r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r
checking: L00000002 [224r,240B:1)[240B,384B:2)[384B,528r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r
checking: L00000004 [224r,240B:1)[240B,384B:2)[384B,544r:0)[640B,1808B:2)[1904r,1920B:3)[1920B,2624r:4)[2864B,2912B:0)[3072r,3104B:5)[3104B,3264B:0) 0 at 2864B-phi 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 3072r
checking: L00000008 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,2912B:5)[3072r,3104B:6)[3104B,3184r:5)[3216r,3264B:0) 0 at 3216r 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 2864B-phi 6 at 3072r
Prune sublane 00000008 at 3216r
Assertion failed: (I->end <= std::next(I)->start), function verify, file ../lib/CodeGen/LiveInterval.cpp, line 1022.
(lldb) up 5
frame #5: 0x00000001012cf283 llc`(anonymous namespace)::JoinVals::pruneSubRegValues(this=0x00007fff5fbfa9e8, LI=0x0000000106b0a9e0, ShrinkMask=0x0000000106d00be0) + 1619 at RegisterCoalescer.cpp:2872
2869 }
2870 // Mark value number as unused.
2871 ValueOut->markUnused();
-> 2872 S.verify();
2873 continue;
2874 }
2875 // If a subrange ends at the copy, then a value was copied but only
(lldb) p S.dump()
L00000008 [224r,240B:1)[240B,384B:2)[640B,1808B:2)[1904r,1920B:3)[1920B,2608B:4)[2864B,3264B:5)[3072r,3104B:6)[3104B,3184r:5) 0 at x 1 at 224r 2 at 240B-phi 3 at 1904r 4 at 1920B-phi 5 at 2864B-phi 6 at 3072r
It was trying to coalesce %72 and %86, and it did that thing where it notices that a value of %86 is copied (at 3216) from %67, which is itself copied (at 3184) from %72. %72's value at that point is 5 at 2864B-phi, which reaches 3184 by branching over [2912B,3104B), in which another value 6 at 3072r is defined.
In lane L00000008, it wants to remove the segment [3216r,3264B:0) and extend [2864B,2912B:5) to 3264B, but it ends up with [2864B,3264B:5), whereas it should have [2864B,2912B:5)[3104B,3264B:5).
Repository:
rL LLVM
https://reviews.llvm.org/D48102
More information about the llvm-commits
mailing list