[llvm-commits] [llvm] r165072 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp test/CodeGen/X86/2012-10-02-DAGCycle.ll

Thu Oct 4 18:59:33 PDT 2012

r165287
On Oct 4, 2012, at 5:09 PM, Evan Cheng <evan.cheng at apple.com> wrote:

> Looking.
> 
> Evan
> 
> On Oct 4, 2012, at 11:37 AM, Benjamin Kramer <benny.kra at gmail.com> wrote:
> 
>> 
>> On 03.10.2012, at 01:49, Evan Cheng <evan.cheng at apple.com> wrote:
>> 
>>> Author: evancheng
>>> Date: Tue Oct  2 18:49:13 2012
>>> New Revision: 165072
>>> 
>>> URL: http://llvm.org/viewvc/llvm-project?rev=165072&view=rev
>>> Log:
>>> Fix a serious X86 instruction selection bug. In
>>> X86DAGToDAGISel::PreprocessISelDAG(), isel is moving load inside
>>> callseq_start / callseq_end so it can be folded into a call. This can
>>> create a cycle in the DAG when the call is glued to a copytoreg. We
>>> have been lucky this hasn't caused too many issues because the pre-ra
>>> scheduler has special handling of call sequences. However, it has
>>> caused a crash in a specific tailcall case.
>> 
>> This commit breaks compiling bullet from the test-suite:
>> 
>> $ cd test-suite/MultiSource/Benchmarks/Bullet
>> $ clang++ -Iinclude -DNO_TIME -c btConeShape.cpp -O3 -o /dev/null
>> 
>> # Machine code for function _ZNK11btConeShape24localGetSupportingVertexERK9btVector3: Post SSA
>> Constant Pool:
>> cp#0: float -1.000000e+00, align=4
>> cp#1: float 0x3D10000000000000, align=4
>> cp#2: float 1.000000e+00, align=4
>> Function Live Ins: %RDI in %vreg6, %RSI in %vreg7
>> Function Live Outs: %XMM0 %XMM1
>> 
>> BB#0: derived from LLVM BB %entry
>>   Live Ins: %RDI %RSI
>> 	%vreg7<def> = COPY %RSI<kill>; GR64:%vreg7
>> 	%vreg6<def> = COPY %RDI<kill>; GR64:%vreg6
>> 	ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def,dead>, %RSP<imp-use>
>> 	%RDI<def> = COPY %vreg6; GR64:%vreg6
>> 	%RSI<def> = COPY %vreg7; GR64:%vreg7
>> 	CALL64pcrel32 <ga:@_ZNK11btConeShape16coneLocalSupportERK9btVector3>, <regmask>, %RSP<imp-use>, %RDI<imp-use>, %RSI<imp-use,kill>, %RSP<imp-def>, %XMM0<imp-def>, %XMM1<imp-def>
>> 	ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def,dead>, %RSP<imp-use>
>> 	%vreg8<def> = COPY %XMM0<kill>; VR128:%vreg8
>> 	%vreg9<def> = COPY %XMM1<kill>; VR128:%vreg9
>> 	%vreg10<def> = MOV64rm %vreg6, 1, %noreg, 0, %noreg; mem:LD8[%3](tbaa=!"vtable pointer") GR64:%vreg10,%vreg6
>> 	ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def,dead>, %RSP<imp-use>
>> 	%RDI<def> = COPY %vreg6; GR64:%vreg6
>> 	CALL64m %vreg10<kill>, 1, %noreg, 88, %noreg, <regmask>, %RSP<imp-use>, %RDI<imp-use>, %RSP<imp-def>, %XMM0<imp-def>; mem:LD8[%vfn] GR64:%vreg10
>> 	ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def,dead>, %RSP<imp-use>
>> 	%vreg11<def> = COPY %XMM0<kill>; FR32:%vreg11
>> 	%vreg1<def> = COPY %vreg9<kill>; VR128:%vreg1,%vreg9
>> 	%vreg0<def> = COPY %vreg8<kill>; VR128:%vreg0,%vreg8
>> 	%vreg12<def> = FsFLD0SS; FR32:%vreg12
>> 	UCOMISSrr %vreg11<kill>, %vreg12<kill>, %EFLAGS<imp-def>; FR32:%vreg11,%vreg12
>> 	JNE_4 <BB#1>, %EFLAGS<imp-use>
>> 	JP_4 <BB#1>, %EFLAGS<imp-use,kill>
>>   Successors according to CFG: BB#1(20) BB#9(12)
>> 
>> BB#9: 
>>   Predecessors according to CFG: BB#0
>> 	%vreg58<def> = COPY %vreg1<kill>; VR128:%vreg58,%vreg1
>> 	%vreg59<def> = COPY %vreg0<kill>; VR128:%vreg59,%vreg0
>> 	JMP_4 <BB#8>
>>   Successors according to CFG: BB#8
>> 
>> BB#1: derived from LLVM BB %if.then
>>   Predecessors according to CFG: BB#0
>> 	%vreg13<def> = MOVSSrm %vreg7, 1, %noreg, 0, %noreg; mem:LD4[%vecnorm.sroa.0.0..idx39] FR32:%vreg13 GR64:%vreg7
>> 	%vreg14<def> = MOVSSrm %vreg7, 1, %noreg, 4, %noreg; mem:LD4[%vecnorm.sroa.1.4..idx26] FR32:%vreg14 GR64:%vreg7
>> 	%vreg15<def> = COPY %vreg14; FR32:%vreg15,%vreg14
>> 	%vreg15<def,tied1> = MULSSrr %vreg15<tied0>, %vreg15; FR32:%vreg15
>> 	%vreg16<def> = COPY %vreg13; FR32:%vreg16,%vreg13
>> 	%vreg16<def,tied1> = MULSSrr %vreg16<tied0>, %vreg16; FR32:%vreg16
>> 	%vreg17<def> = COPY %vreg16<kill>; FR32:%vreg17,%vreg16
>> 	%vreg17<def,tied1> = ADDSSrr %vreg17<tied0>, %vreg15<kill>; FR32:%vreg17,%vreg15
>> 	%vreg18<def> = MOVSSrm %vreg7<kill>, 1, %noreg, 8, %noreg; mem:LD4[%vecnorm.sroa.2.8..idx27] FR32:%vreg18 GR64:%vreg7
>> 	%vreg19<def> = COPY %vreg18; FR32:%vreg19,%vreg18
>> 	%vreg19<def,tied1> = MULSSrr %vreg19<tied0>, %vreg19; FR32:%vreg19
>> 	%vreg20<def> = COPY %vreg19<kill>; FR32:%vreg20,%vreg19
>> 	%vreg20<def,tied1> = ADDSSrr %vreg20<tied0>, %vreg17<kill>; FR32:%vreg20,%vreg17
>> 	%vreg21<def> = MOV64rm %vreg6<kill>, 1, %noreg, 0, %noreg; mem:LD8[%5](tbaa=!"vtable pointer") GR64:%vreg21,%vreg6
>> 	ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def,dead>, %RSP<imp-use>
>> 	%vreg22<def> = MOVSSrm %RIP, 1, %noreg, <cp#0>, %noreg; mem:LD4[ConstantPool] FR32:%vreg22
>> 	%vreg23<def> = MOVSSrm %RIP, 1, %noreg, <cp#1>, %noreg; mem:LD4[ConstantPool] FR32:%vreg23
>> 	UCOMISSrr %vreg23<kill>, %vreg20<kill>, %EFLAGS<imp-def>; FR32:%vreg23,%vreg20
>> 	%vreg55<def> = COPY %vreg22; FR32:%vreg55,%vreg22
>> 	JA_4 <BB#3>, %EFLAGS<imp-use>
>>   Successors according to CFG: BB#2 BB#3
>> 
>> BB#2: derived from LLVM BB %if.then
>>   Live Ins: %EFLAGS
>>   Predecessors according to CFG: BB#1
>> 	%vreg55<def> = COPY %vreg14<kill>; FR32:%vreg55,%vreg14
>>   Successors according to CFG: BB#3
>> 
>> BB#3: derived from LLVM BB %if.then
>>   Live Ins: %EFLAGS
>>   Predecessors according to CFG: BB#1 BB#2
>> 	%vreg24<def> = COPY %vreg55<kill>; FR32:%vreg24,%vreg55
>> 	%vreg56<def> = COPY %vreg22; FR32:%vreg56,%vreg22
>> 	JA_4 <BB#5>, %EFLAGS<imp-use>
>>   Successors according to CFG: BB#4 BB#5
>> 
>> BB#4: derived from LLVM BB %if.then
>>   Live Ins: %EFLAGS
>>   Predecessors according to CFG: BB#3
>> 	%vreg56<def> = COPY %vreg18<kill>; FR32:%vreg56,%vreg18
>>   Successors according to CFG: BB#5
>> 
>> BB#5: derived from LLVM BB %if.then
>>   Live Ins: %EFLAGS
>>   Predecessors according to CFG: BB#3 BB#4
>> 	%vreg25<def> = COPY %vreg56<kill>; FR32:%vreg25,%vreg56
>> 	%vreg26<def> = COPY %vreg24; FR32:%vreg26,%vreg24
>> 	%vreg26<def,tied1> = MULSSrr %vreg26<tied0>, %vreg26; FR32:%vreg26
>> 	%vreg57<def> = COPY %vreg22<kill>; FR32:%vreg57,%vreg22
>> 	JA_4 <BB#7>, %EFLAGS<imp-use,kill>
>>   Successors according to CFG: BB#6 BB#7
>> 
>> BB#6: derived from LLVM BB %if.then
>>   Predecessors according to CFG: BB#5
>> 	%vreg57<def> = COPY %vreg13<kill>; FR32:%vreg57,%vreg13
>>   Successors according to CFG: BB#7
>> 
>> BB#7: derived from LLVM BB %if.then
>>   Predecessors according to CFG: BB#5 BB#6
>> 	%vreg27<def> = COPY %vreg57<kill>; FR32:%vreg27,%vreg57
>> 	CALL64m %vreg21<kill>, 1, %noreg, 88, %noreg, <regmask>, %RSP<imp-use>, %RDI<imp-use>, %RSP<imp-def>, %XMM0<imp-def>; mem:LD8[%vfn11] GR64:%vreg21
>> 	ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def,dead>, %RSP<imp-use>
>> 	%vreg28<def> = COPY %XMM0<kill>; FR32:%vreg28
>> 	%vreg29<def> = PSHUFDri %vreg0, 1; VR128:%vreg29,%vreg0
>> 	%vreg30<def> = PSHUFDri %vreg1, 1; VR128:%vreg30,%vreg1
>> 	%vreg31<def> = COPY %vreg30<kill>; VR128:%vreg31,%vreg30
>> 	%vreg35<def> = COPY %vreg27; FR32:%vreg35,%vreg27
>> 	%vreg35<def,tied1> = MULSSrr %vreg35<tied0>, %vreg35; FR32:%vreg35
>> 	%vreg36<def> = COPY %vreg35<kill>; FR32:%vreg36,%vreg35
>> 	%vreg36<def,tied1> = ADDSSrr %vreg36<tied0>, %vreg26<kill>; FR32:%vreg36,%vreg26
>> 	%vreg38<def> = COPY %vreg25; FR32:%vreg38,%vreg25
>> 	%vreg38<def,tied1> = MULSSrr %vreg38<tied0>, %vreg38; FR32:%vreg38
>> 	%vreg39<def> = COPY %vreg38<kill>; FR32:%vreg39,%vreg38
>> 	%vreg39<def,tied1> = ADDSSrr %vreg39<tied0>, %vreg36<kill>; FR32:%vreg39,%vreg36
>> 	%vreg40<def> = SQRTSSr %vreg39<kill>; FR32:%vreg40,%vreg39
>> 	%vreg41<def> = MOVSSrm %RIP, 1, %noreg, <cp#2>, %noreg; mem:LD4[ConstantPool] FR32:%vreg41
>> 	%vreg42<def> = COPY %vreg41<kill>; FR32:%vreg42,%vreg41
>> 	%vreg42<def,tied1> = DIVSSrr %vreg42<tied0>, %vreg40<kill>; FR32:%vreg42,%vreg40
>> 	%vreg43<def> = COPY %vreg24<kill>; FR32:%vreg43,%vreg24
>> 	%vreg43<def,tied1> = MULSSrr %vreg43<tied0>, %vreg42; FR32:%vreg43,%vreg42
>> 	%vreg44<def> = COPY %vreg43<kill>; FR32:%vreg44,%vreg43
>> 	%vreg44<def,tied1> = MULSSrr %vreg44<tied0>, %vreg28; FR32:%vreg44,%vreg28
>> 	%vreg45<def> = COPY %vreg44<kill>; VR128:%vreg45 FR32:%vreg44
>> 	%vreg45<def,tied1> = ADDSSrr %vreg45<tied0>, %vreg29<kill>; VR128:%vreg45,%vreg29
>> 	%vreg47<def> = COPY %vreg27<kill>; FR32:%vreg47,%vreg27
>> 	%vreg47<def,tied1> = MULSSrr %vreg47<tied0>, %vreg42; FR32:%vreg47,%vreg42
>> 	%vreg48<def> = COPY %vreg47<kill>; FR32:%vreg48,%vreg47
>> 	%vreg48<def,tied1> = MULSSrr %vreg48<tied0>, %vreg28; FR32:%vreg48,%vreg28
>> 	%vreg49<def> = COPY %vreg48<kill>; VR128:%vreg49 FR32:%vreg48
>> 	%vreg49<def,tied1> = ADDSSrr %vreg49<tied0>, %vreg0<kill>; VR128:%vreg49,%vreg0
>> 	%vreg2<def> = COPY %vreg49<kill>; VR128:%vreg2,%vreg49
>> 	%vreg2<def,tied1> = UNPCKLPSrr %vreg2<tied0>, %vreg45<kill>; VR128:%vreg2,%vreg45
>> 	%vreg51<def> = COPY %vreg42<kill>; FR32:%vreg51,%vreg42
>> 	%vreg51<def,tied1> = MULSSrr %vreg51<tied0>, %vreg25<kill>; FR32:%vreg51,%vreg25
>> 	%vreg52<def> = COPY %vreg51<kill>; FR32:%vreg52,%vreg51
>> 	%vreg52<def,tied1> = MULSSrr %vreg52<tied0>, %vreg28<kill>; FR32:%vreg52,%vreg28
>> 	%vreg53<def> = COPY %vreg52<kill>; VR128:%vreg53 FR32:%vreg52
>> 	%vreg53<def,tied1> = ADDSSrr %vreg53<tied0>, %vreg1<kill>; VR128:%vreg53,%vreg1
>> 	%vreg3<def> = COPY %vreg53<kill>; VR128:%vreg3,%vreg53
>> 	%vreg3<def,tied1> = UNPCKLPSrr %vreg3<tied0>, %vreg31<kill>; VR128:%vreg3,%vreg31
>> 	%vreg58<def> = COPY %vreg3<kill>; VR128:%vreg58,%vreg3
>> 	%vreg59<def> = COPY %vreg2<kill>; VR128:%vreg59,%vreg2
>>   Successors according to CFG: BB#8
>> 
>> BB#8: derived from LLVM BB %if.end17
>>   Predecessors according to CFG: BB#7 BB#9
>> 	%vreg4<def> = COPY %vreg58<kill>; VR128:%vreg4,%vreg58
>> 	%vreg5<def> = COPY %vreg59<kill>; VR128:%vreg5,%vreg59
>> 	%XMM0<def> = COPY %vreg5<kill>; VR128:%vreg5
>> 	%XMM1<def> = COPY %vreg4<kill>; VR128:%vreg4
>> 	RET %XMM0<imp-use,kill>, %XMM1<imp-use,kill>
>> 
>> # End machine code for function _ZNK11btConeShape24localGetSupportingVertexERK9btVector3.
>> 
>> *** Bad machine code: Using an undefined physical register ***
>> - function:    _ZNK11btConeShape24localGetSupportingVertexERK9btVector3
>> - basic block: BB#7 if.then (0x7fc829b6b648)
>> - instruction: CALL64m %vreg21<kill>, 1, %noreg, 88, %noreg, <regmask>, %RSP<imp-use>, %RDI<imp-use>, %RSP<imp-def>, %XMM0<imp-def>; mem:LD8[%vfn11] GR64:%vreg21
>> - operand 7:   %RDI<imp-use>
>> fatal error: error in backend: Found 1 machine code errors.
>> 
>> 
>>> 
>>> rdar://12393897
>>> 
>>> Added:
>>>  llvm/trunk/test/CodeGen/X86/2012-10-02-DAGCycle.ll
>>> Modified:
>>>  llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
>>> 
>>> Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=165072&r1=165071&r2=165072&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
>>> +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Tue Oct  2 18:49:13 2012
>>> @@ -362,7 +362,7 @@
>>> /// MoveBelowCallOrigChain - Replace the original chain operand of the call with
>>> /// load's chain operand and move load below the call's chain operand.
>>> static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
>>> -                                  SDValue Call, SDValue OrigChain) {
>>> +                               SDValue Call, SDValue OrigChain) {
>>> SmallVector<SDValue, 8> Ops;
>>> SDValue Chain = OrigChain.getOperand(0);
>>> if (Chain.getNode() == Load.getNode())
>>> @@ -386,11 +386,22 @@
>>> CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
>>> CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
>>>                            Load.getOperand(1), Load.getOperand(2));
>>> +
>>> +  bool IsGlued = Call.getOperand(0).getNode()->getGluedUser() == Call.getNode();
>>> +  unsigned NumOps = Call.getNode()->getNumOperands();
>>> Ops.clear();
>>> Ops.push_back(SDValue(Load.getNode(), 1));
>>> -  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
>>> +  for (unsigned i = 1, e = NumOps; i != e; ++i)
>>>   Ops.push_back(Call.getOperand(i));
>>> -  CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
>>> +  if (!IsGlued)
>>> +    CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
>>> +  else
>>> +    // If call's chain was glued to the call (tailcall), and now the load
>>> +    // is moved between them. Remove the glue to avoid a cycle (where the
>>> +    // call is glued to its old chain and the load is using the old chain
>>> +    // as its new chain).
>>> +    CurDAG->MorphNodeTo(Call.getNode(), Call.getOpcode(),
>>> +                        Call.getNode()->getVTList(), &Ops[0], NumOps-1);
>>> }
>>> 
>>> /// isCalleeLoad - Return true if call address is a load and it can be
>>> 
>>> Added: llvm/trunk/test/CodeGen/X86/2012-10-02-DAGCycle.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2012-10-02-DAGCycle.ll?rev=165072&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/X86/2012-10-02-DAGCycle.ll (added)
>>> +++ llvm/trunk/test/CodeGen/X86/2012-10-02-DAGCycle.ll Tue Oct  2 18:49:13 2012
>>> @@ -0,0 +1,16 @@
>>> +; RUN: llc -mtriple=i386-apple-macosx -relocation-model=pic < %s
>>> +; rdar://12393897
>>> +
>>> +%TRp = type { i32, %TRH*, i32, i32 }
>>> +%TRH = type { i8*, i8*, i8*, i8*, {}* }
>>> +
>>> +define i32 @t(%TRp* inreg %rp) nounwind optsize ssp {
>>> +entry:
>>> +  %handler = getelementptr inbounds %TRp* %rp, i32 0, i32 1
>>> +  %0 = load %TRH** %handler, align 4
>>> +  %sync = getelementptr inbounds %TRH* %0, i32 0, i32 4
>>> +  %sync12 = load {}** %sync, align 4
>>> +  %1 = bitcast {}* %sync12 to i32 (%TRp*)*
>>> +  %call = tail call i32 %1(%TRp* inreg %rp) nounwind optsize
>>> +  ret i32 %call
>>> +}
>>> 
>>> 
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits