[LLVMdev] Non-local DSE optimization

Jakub Staszak kuba at gcc.gnu.org
Wed Jan 20 10:51:37 PST 2010


Hello,

Patch was improved and adjusted to the trunk. I think that CFG Simplification would be useful before DSE. In other case some modification of "MergeBlockIntoPredecessor" would be needed (see TODO note). Please note also, that patch is disabled by default. Tests on test-suite haven't shown any failures.

Regards
-Jakub
-------------- next part --------------
A non-text attachment was scrubbed...
Name: dse_ssu-4.patch
Type: application/octet-stream
Size: 17858 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100120/f11caf20/attachment.obj>
-------------- next part --------------

 
On Sep 8, 2009, at 2:26 PM, Jakub Staszak wrote:

> Hello,
> 
> Bug is already fixed by Chris (see: http://llvm.org/bugs/show_bug.cgi?id=4915).
> 
> I added getRootNode() == NULL condition to my patch. It's not a great solution, but it is enough for now I think. New patch attached.
> 
> -Jakub<dse_ssu-2.patch>
> On Sep 6, 2009, at 6:09 AM, Nick Lewycky wrote:
> 
>> Jakub Staszak wrote:
>>> Hi,
>>> It looks like PDT.getRootNode() returns NULL for:
>>> define fastcc void @c974001__lengthy_calculation. 1736(%struct.FRAME.c974001* nocapture %CHAIN.185) noreturn {
>>> entry:
>>>  br label %bb
>>> bb:
>>>  br label %bb
>>> }
>>> Isn't it a bug in PostDominatorTree?
>>> Please note that this crashes:
>>> opt -postdomtree -debug dom_crash.bc
>>> I think this should be reported as a bug,
>> 
>> Yes, that's a bug. Please file it.
>> 
>> The PDT root calculation is looking for all BBs with no successors, this won't work in the face of loops. Either we need to teach PDT users that there can be zero roots, or we need to synthesize a fake root.
>> 
>> The latter is already supported (to handle multiple exits) so that's probably the easiest fix.
>> 
>> Nick
>> 
>>> -Jakub
>>> On Sep 3, 2009, at 7:05 AM, Duncan Sands wrote:
>>>> Hi Jakub, interesting patch.  I ran it over the Ada testsuite and this
>>>> picked up some problems even without enabling dse-ssu.  For example,
>>>> "opt -inline -dse -domtree" crashes on the attached testcase.
>>>> 
>>>> Ciao,
>>>> 
>>>> Duncan.
>>>> ; ModuleID = 'dom_crash.bc'
>>>> target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32- i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64- f80:32:32"
>>>> target triple = "i386-pc-linux-gnu"
>>>> 
>>>> %struct.FRAME.c974001 = type { i32, i8, void  (%struct.c974001__timed_calculation*)* }
>>>> %struct.FRAME.c974001__timed_calculationB = type  { %struct.FRAME.c974001*, i32 }
>>>> %struct.FRAME.c974001__timed_calculation__calculationA = type  { %struct.system__tasking__async_delays__delay_block }
>>>> %struct.RETURN = type { i32, i32 }
>>>> %struct.ada__exceptions__exception_occurrence = type  { %struct.system__standard_library__exception_data*, i32, [200 x  i8], i8, i8, i32, i32, [50 x i32], i32 }
>>>> %struct.c974001__timed_calculation = type  { %struct.system__tasking__ada_task_control_block* }
>>>> %struct.system__os_interface__pthread_mutex_t = type { i32, i32,  i32, i32, %struct.RETURN }
>>>> %struct.system__soft_links__tsd = type  { %struct.system__stack_checking__stack_info, i32, i32,  %struct.ada__exceptions__exception_occurrence }
>>>> %struct.system__stack_checking__stack_info = type { i32, i32, i32 }
>>>> %struct.system__stack_usage__stack_analyzer = type { [32 x i8], i32,  i32, i32, i32, i32, i32, i32, i8, i32 }
>>>> %struct.system__standard_library__exception_data = type { i8, i8,  i32, i32, %struct.system__standard_library__exception_data*, i32,  void ()* }
>>>> %struct.system__task_primitives__private_data = type { i32, i32, [48  x i8], %struct.system__os_interface__pthread_mutex_t }
>>>> %struct.system__tasking__accept_alternative = type { i8, i32 }
>>>> %struct.system__tasking__accept_list_access = type { [0 x  %struct.system__tasking__accept_alternative]*, %struct.RETURN* }
>>>> %struct.system__tasking__ada_task_control_block = type { i32,  %struct.system__tasking__common_atcb, [19 x  %struct.system__tasking__entry_call_record], i32,  %struct.system__tasking__accept_list_access, i32, i32, i32, i32,  i32, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i64, i32, i32,  [4 x i32], i8, i32*, [0 x %struct.system__tasking__entry_queue] }
>>>> %struct.system__tasking__async_delays__delay_block = type  { %struct.system__tasking__ada_task_control_block*, i32, i64, i8,  %struct.system__tasking__async_delays__delay_block*,  %struct.system__tasking__async_delays__delay_block* }
>>>> %struct.system__tasking__common_atcb = type { i8,  %struct.system__tasking__ada_task_control_block*, i32, i32, i32, [32  x i8], i32, %struct.system__tasking__entry_call_record*,  %struct.system__task_primitives__private_data, i32, void (i32)*,  %struct.system__soft_links__tsd,  %struct.system__tasking__ada_task_control_block*,  %struct.system__tasking__ada_task_control_block*,  %struct.system__tasking__ada_task_control_block*, i32, i8*, i8, i8,  %struct.system__stack_usage__stack_analyzer, i32,  %struct.system__tasking__termination_handler,  %struct.system__tasking__termination_handler }
>>>> %struct.system__tasking__entry_call_record = type  { %struct.system__tasking__ada_task_control_block*, i8, i8, i32,  %struct.system__standard_library__exception_data*,  %struct.system__tasking__entry_call_record*,  %struct.system__tasking__entry_call_record*, i32, i32, i32,  %struct.system__tasking__ada_task_control_block*, i32,  %struct.system__tasking__entry_call_record*, i32, i8, i8, i8 }
>>>> %struct.system__tasking__entry_queue = type  { %struct.system__tasking__entry_call_record*,  %struct.system__tasking__entry_call_record* }
>>>> %struct.system__tasking__termination_handler = type { i32, void  (i32, i8, %struct.system__tasking__ada_task_control_block*,  %struct.ada__exceptions__exception_occurrence*)* }
>>>> 
>>>> @C.168.1967 = external constant %struct.RETURN    ; < %struct.RETURN*> [#uses=1]
>>>> 
>>>> define void  @system__tasking__activation_chainIP (%struct.c974001__timed_calculation* nocapture %_init) nounwind {
>>>> entry:
>>>> ret void
>>>> }
>>>> 
>>>> define void @_ada_c974001() {
>>>> entry:
>>>> %tramp = call i8* @llvm.init.trampoline(i8* undef, i8* bitcast  (void (%struct.FRAME.c974001*, %struct.c974001__timed_calculation*)*  @c974001__timed_calculationB.1770 to i8*), i8* undef) ; <i8*>  [#uses=0]
>>>> unreachable
>>>> }
>>>> 
>>>> declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
>>>> 
>>>> define fastcc void @c974001__lengthy_calculation. 1736(%struct.FRAME.c974001* nocapture %CHAIN.185) noreturn {
>>>> entry:
>>>> br label %bb
>>>> 
>>>> bb:                                               ; preds = %bb,  %entry
>>>> br label %bb
>>>> }
>>>> 
>>>> define fastcc void  @c974001__timed_calculation__calculation__B19b__B21b__A17b___clean. 1830(%struct.FRAME.c974001__timed_calculation__calculationA* %CHAIN. 188) {
>>>> entry:
>>>> ret void
>>>> }
>>>> 
>>>> define fastcc void @c974001__timed_calculation__calculationA. 1820(%struct.FRAME.c974001__timed_calculationB* nocapture %CHAIN. 190) {
>>>> entry:
>>>> br i1 undef, label %bb, label %bb3
>>>> 
>>>> bb:                                               ; preds = %entry
>>>> unreachable
>>>> 
>>>> bb3:                                              ; preds = %entry
>>>> br i1 undef, label %bb4, label %bb5
>>>> 
>>>> bb4:                                              ; preds = %bb3
>>>> unreachable
>>>> 
>>>> bb5:                                              ; preds = %bb3
>>>> invoke void undef()
>>>>        to label %invcont unwind label %lpad
>>>> 
>>>> invcont:                                          ; preds = %bb5
>>>> %0 = invoke i8 @system__tasking__async_delays__enqueue_duration(i64  undef, %struct.system__tasking__async_delays__delay_block* undef)
>>>>        to label %bb8 unwind label %lpad        ; <i8> [#uses=0]
>>>> 
>>>> bb8:                                              ; preds = %invcont
>>>> invoke void undef()
>>>>        to label %invcont9 unwind label %lpad75
>>>> 
>>>> invcont9:                                         ; preds = %bb8
>>>> invoke fastcc void @c974001__lengthy_calculation. 1736(%struct.FRAME.c974001* undef)
>>>>        to label %invcont10 unwind label %lpad75
>>>> 
>>>> invcont10:                                        ; preds = %invcont9
>>>> invoke void @report__failed([0 x i8]* undef, %struct.RETURN* @C. 168.1967)
>>>>        to label %bb16 unwind label %lpad75
>>>> 
>>>> bb16:                                             ; preds = %invcont10
>>>> invoke fastcc void  @c974001__timed_calculation__calculation__B19b__B21b__A17b___clean. 1830(%struct.FRAME.c974001__timed_calculation__calculationA* undef)
>>>>        to label %bb27 unwind label %lpad71
>>>> 
>>>> bb27:                                             ; preds = %bb16
>>>> unreachable
>>>> 
>>>> lpad:                                             ; preds =  %invcont, %bb5
>>>> unreachable
>>>> 
>>>> lpad71:                                           ; preds = %bb16
>>>> unreachable
>>>> 
>>>> lpad75:                                           ; preds =  %invcont10, %invcont9, %bb8
>>>> unreachable
>>>> }
>>>> 
>>>> declare i8 @system__tasking__async_delays__enqueue_duration(i64,  %struct.system__tasking__async_delays__delay_block*)
>>>> 
>>>> declare void @report__failed([0 x i8]*, %struct.RETURN*)
>>>> 
>>>> define void @c974001__timed_calculationB.1770(%struct.FRAME.c974001*  nest %CHAIN.191, %struct.c974001__timed_calculation* nocapture  %_task) {
>>>> entry:
>>>> invoke void undef()
>>>>        to label %invcont unwind label %lpad
>>>> 
>>>> invcont:                                          ; preds = %entry
>>>> invoke void @system__tasking__stages__complete_activation()
>>>>        to label %bb unwind label %lpad
>>>> 
>>>> bb:                                               ; preds = %bb5,  %invcont4, %invcont
>>>> invoke void  @system__tasking__rendezvous__selective_wait(%struct.RETURN* noalias  sret undef, [0 x %struct.system__tasking__accept_alternative]*  undef, %struct.RETURN* undef, i8 2)
>>>>        to label %invcont4 unwind label %lpad25
>>>> 
>>>> invcont4:                                         ; preds = %bb
>>>> br i1 undef, label %bb5, label %bb
>>>> 
>>>> bb5:                                              ; preds = %invcont4
>>>> invoke fastcc void @c974001__timed_calculation__calculationA. 1820(%struct.FRAME.c974001__timed_calculationB* undef)
>>>>        to label %bb unwind label %lpad25
>>>> 
>>>> bb7:                                              ; preds = %lpad25
>>>> unreachable
>>>> 
>>>> lpad:                                             ; preds =  %invcont, %entry
>>>> unreachable
>>>> 
>>>> lpad25:                                           ; preds = %bb5, %bb
>>>> br i1 undef, label %bb7, label %ppad
>>>> 
>>>> ppad:                                             ; preds = %lpad25
>>>> unreachable
>>>> }
>>>> 
>>>> declare void @system__tasking__stages__complete_activation()
>>>> 
>>>> declare void  @system__tasking__rendezvous__selective_wait(%struct.RETURN* noalias  sret, [0 x %struct.system__tasking__accept_alternative]*,  %struct.RETURN*, i8)
>>> _______________________________________________
>>> LLVM Developers mailing list
>>> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
> 
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev



More information about the llvm-dev mailing list