<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <meta content="text/html; charset=ISO-8859-1"
      http-equiv="Content-Type">
  </head>
  <body text="#000000" bgcolor="#ffffff">
    Update: I've found out, that the location information are possibly
    incorrect,<br>
    if they point to standard C/C++ headers as shown in the following
    listing:<br>
    <tt>--------------------------------------------------------------------------------<br>
      [...]<br>
      /usr/include/c++/4.4/bits/basic_ios.h: 48<br>
            if (!__f)<br>
       ^<br>
        58:  80489e5:    e8 2e fd ff ff           call   8048718
      <_ZSt16__throw_bad_castv@plt><br>
        59:  80489ea:    66 0f 1f 44 00 00        nopw  
      0x0(%eax,%eax,1)<br>
        60:  80489f0:    55                       push   %ebp<br>
        61:  80489f1:    89 e5                    mov    %esp,%ebp<br>
        62:  80489f3:    83 ec 18                 sub    $0x18,%esp<br>
        63:  80489f6:    c7 04 24 94 a1 04 08     movl  
      $0x804a194,(%esp)<br>
        64:  80489fd:    e8 56 fd ff ff           call   8048758
      <_ZNSt8ios_base4InitC1Ev@plt><br>
        65:  8048a02:    c7 44 24 08 44 a0 04     movl  
      $0x804a044,0x8(%esp)<br>
        66:  8048a09:    08 <br>
        67:  8048a0a:    c7 44 24 04 94 a1 04     movl  
      $0x804a194,0x4(%esp)<br>
        68:  8048a11:    08 <br>
        69:  8048a12:    c7 04 24 78 87 04 08     movl  
      $0x8048778,(%esp)<br>
        70:  8048a19:    e8 ea fc ff ff           call   8048708
      <__cxa_atexit@plt><br>
        71:  8048a1e:    83 c4 18                 add    $0x18,%esp<br>
        72:  8048a21:    5d                       pop    %ebp<br>
        73:  8048a22:    c3                       ret    <br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/bits/basic_ios.h: 439<br>
            widen(char __c) const<br>
              ^<br>
        74:  8048958:    8b 5c 30 7c              mov   
      0x7c(%eax,%esi,1),%ebx<br>
        75:  804895c:    85 db                    test   %ebx,%ebx<br>
        76:  804895e:    0f 84 81 00 00 00        je     80489e5
      <main+0x135><br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/bits/locale_facets.h: 866<br>
            {<br>
       ^<br>
        77:  8048964:    80 7b 1c 00              cmpb   $0x0,0x1c(%ebx)<br>
        78:  8048968:    74 86                    je     80488f0
      <main+0x40><br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/bits/locale_facets.h: 867<br>
          if (_M_widen_ok)<br>
            ^<br>
        79:  804896a:    8a 43 27                 mov    0x27(%ebx),%al<br>
        80:  804896d:    eb 99                    jmp    8048908
      <main+0x58><br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/bits/locale_facets.h: 868<br>
            return _M_widen[static_cast<unsigned char>(__c)];<br>
          ^<br>
        81:  80488f0:    89 1c 24                 mov    %ebx,(%esp)<br>
        82:  80488f3:    e8 50 fe ff ff           call   8048748
      <_ZNKSt5ctypeIcE13_M_widen_initEv@plt><br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/bits/locale_facets.h: 869<br>
          this->_M_widen_init();<br>
          ^<br>
        83:  80488f8:    8b 03                    mov    (%ebx),%eax<br>
        84:  80488fa:    89 1c 24                 mov    %ebx,(%esp)<br>
        85:  80488fd:    c7 44 24 04 0a 00 00     movl   $0xa,0x4(%esp)<br>
        86:  8048904:    00 <br>
        87:  8048905:    ff 50 18                 call   *0x18(%eax)<br>
--------------------------------------------------------------------------------<br>
      [...]<br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/ostream: 538<br>
          endl(basic_ostream<_CharT, _Traits>& __os)<br>
            ^<br>
        98:  8048908:    0f be c0                 movsbl %al,%eax<br>
        99:  804890b:    89 44 24 04              mov    %eax,0x4(%esp)<br>
       100:  804890f:    89 34 24                 mov    %esi,(%esp)<br>
       101:  8048912:    e8 c1 fe ff ff           call   80487d8
      <_ZNSo3putEc@plt><br>
       102:  8048953:    8b 06                    mov    (%esi),%eax<br>
       103:  8048955:    8b 40 f4                 mov    -0xc(%eax),%eax<br>
--------------------------------------------------------------------------------<br>
      /usr/include/c++/4.4/ostream: 559<br>
          flush(basic_ostream<_CharT, _Traits>& __os)<br>
            ^<br>
       104:  8048917:    89 04 24                 mov    %eax,(%esp)<br>
       105:  804891a:    e8 79 fe ff ff           call   8048798
      <_ZNSo5flushEv@plt><br>
       106:  804891f:    8b 75 ec                 mov   
      -0x14(%ebp),%esi<br>
       107:  8048922:    47                       inc    %edi<br>
--------------------------------------------------------------------------------</tt><br>
    (The "^" marks the column position within the line.)<br>
    <br>
    I am not completely sure but the mapping of line 868 in file
    "locale_facets.h" might be wrong: There is a call-instruction which
    calls "_M_widen_init" but this function is effectively called in the
    next line (869).<br>
    <br>
    Here is the extract from locale_facets.h:<br>
    <tt>      char_type<br>
            widen(char __c) const<br>
            {<br>
          if (_M_widen_ok)<br>
            return _M_widen[static_cast<unsigned char>(__c)];<br>
          this->_M_widen_init();<br>
          return this->do_widen(__c);<br>
            }</tt><br>
    <br>
    In addition, line 48 of "basic_ios.h" contains a ret-instruction
    which should be mapping to a return- or throw-statement. <span
      class="Apple-style-span" style="border-collapse: separate; color:
      rgb(0, 0, 0); font-family: 'Times New Roman'; font-style: normal;
      font-variant: normal; font-weight: normal; letter-spacing: normal;
      line-height: normal; orphans: 2; text-indent: 0px; text-transform:
      none; white-space: normal; widows: 2; word-spacing: 0px;
      font-size: medium;"><span class="Apple-style-span"
        style="font-family: arial,sans-serif; font-size: 25px;"><span
          class="hps" title="Zur Anzeige alternativer Übersetzungen
          klicken">The</span><span class="Apple-converted-space"> </span><span
          class="hps" title="Zur Anzeige alternativer Übersetzungen
          klicken">column</span><span class="Apple-converted-space"> </span><span
          class="hps" title="Zur Anzeige alternativer Übersetzungen
          klicken">numbers</span><span class="Apple-converted-space"> </span><span
          class="hps" title="Zur Anzeige alternativer Übersetzungen
          klicken">are obviously</span><span
          class="Apple-converted-space"> </span><span class="hps"
          title="Zur Anzeige alternativer Übersetzungen klicken">wrong</span><span
          title="Zur Anzeige alternativer Übersetzungen klicken"
          class="">.</span></span></span><br>
    <br>
    Are these interpretations correct?<br>
    <br>
    Best regards<br>
      Adrian<br>
    <br>
    <br>
    On 31.05.2011 20:17, <a class="moz-txt-link-abbreviated" href="mailto:trash-stuff@gmx.de">trash-stuff@gmx.de</a> wrote:
    <blockquote cite="mid:4DE530BC.8080706@gmx.de" type="cite">
      <meta content="text/html; charset=ISO-8859-1"
        http-equiv="Content-Type">
      On 31.05.2011 19:45, Devang Patel wrote:
      <blockquote
        cite="mid:28E8AD70-77FD-4FB3-A37C-E3A30284047D@apple.com"
        type="cite"><br>
        <div>
          <div>On May 31, 2011, at 10:36 AM, <a moz-do-not-send="true"
              href="mailto:trash-stuff@gmx.de">trash-stuff@gmx.de</a>
            wrote:</div>
          <br class="Apple-interchange-newline">
          <blockquote type="cite"><span class="Apple-style-span"
              style="border-collapse: separate; font-family: Verdana;
              font-style: normal; font-variant: normal; font-weight:
              normal; letter-spacing: normal; line-height: normal;
              orphans: 2; text-indent: 0px; text-transform: none;
              white-space: normal; widows: 2; word-spacing: 0px;
              font-size: medium;">On 31.05.2011 19:22, Devang Patel
              wrote:
              <blockquote
                cite="mid:63165691-E116-4435-9188-7976D35830BB@apple.com"
                type="cite"><br>
                <div>
                  <div>On May 30, 2011, at 11:11 AM,<span
                      class="Apple-converted-space"> </span><a
                      moz-do-not-send="true"
                      href="mailto:trash-stuff@gmx.de">trash-stuff@gmx.de</a><span
                      class="Apple-converted-space"> </span>wrote:</div>
                  <br class="Apple-interchange-newline">
                  <blockquote type="cite"><span class="Apple-style-span"
                      style="border-collapse: separate; font-family:
                      Verdana; font-style: normal; font-variant: normal;
                      font-weight: normal; letter-spacing: normal;
                      line-height: normal; orphans: 2; text-indent: 0px;
                      text-transform: none; white-space: normal; widows:
                      2; word-spacing: 0px; font-size: medium;">Hi all,<br>
                      <br>
                      I am processing DWARF line and column information
                      in (x86 and ARM) executables in order to produce a
                      mapping from the machine instructions back to the
                      original source code (C/C++). Using the line
                      numbers is quite straightforward ("libdwarf" [1]
                      is doing the work me.) But when comparing the
                      column numbers (extracted from the DWARF line
                      table) with the corresponding source code
                      locations, it becomes clear that they are not very
                      "useful".<br>
                      <br>
                      Consider the following small example (C++):<br>
                      <blockquote><tt> 1: #include <iostream><br>
                           2: #include <ctime><br>
                           3: #include <cstdlib><br>
                           4: using namespace std;<br>
                           5: int main() {<br>
                           6:    int j = 0; cin >> j; long sum =
                          (j < 0 ? -5 : 4) + rand();<br>
                           7:    for(int i = 0; i < j; i++) { sum +=
                          j*j-2; cout << (sum / 2) << endl;
                          }<br>
                           8:    srand(time(NULL));<br>
                           9:    double d = rand() / 10.341; int t =
                          (int)d+j*sum;<br>
                          10:    cout << sum << d << t
                          << j;<br>
                          11:    return (0);<br>
                          12: }</tt><br>
                      </blockquote>
                      Compiling this with "clang++ Main.cpp -g -O3 -o
                      column" result in the following location
                      information within the generated executable:<br>
                      <blockquote><tt>$ dwarfdump -l column<br>
                          <br>
                          .debug_line: line number info for a single cu<br>
                          Source lines (from CU-DIE at .debug_info
                          offset 11):<br>
                            <source file>     [line,column]    
                          <pc>    //<new stmt or basic block<br>
                          .../locale_facets.h:  [868, 2]    0x80488f0 
                          // new statement</tt><br>
                        <tt>               [...]</tt><br>
                        <tt>.../Main.cpp:   </tt><tt>     <span
                            class="Apple-converted-space"> </span></tt><tt>[ 

                          8, 2]    0x804896f  // new statement</tt><br>
                        <tt>.../Main.cpp:   </tt><tt>     <span
                            class="Apple-converted-space"> </span></tt><tt>[ 

                          9,28]    0x8048983  // new statement</tt><br>
                        <tt>.../ostream:  </tt><tt>     <span
                            class="Apple-converted-space"> </span></tt><tt> 
                          [165, 9]    0x8048990  // new statement</tt><br>
                        <tt>.../Main.cpp:  </tt><tt>      </tt><tt><span
                            class="Apple-converted-space"> </span>[ 
                          9,28]    0x80489a0  // new statement</tt><br>
                        <tt>.../ostream:   </tt><tt>      <span
                            class="Apple-converted-space"> </span></tt><tt>[209,

                          9]    0x80489ac  // new statement</tt><br>
                        <tt>.../Main.cpp:   </tt><tt>     <span
                            class="Apple-converted-space"> </span></tt><tt>[ 

                          9,28]    0x80489b5  // new statement</tt><br>
                        <tt>.../ostream:   </tt><tt>      <span
                            class="Apple-converted-space"> </span></tt><tt>[209,

                          9]    0x80489bb  // new statement</tt><br>
                        <tt>               [...]</tt><br>
                        <tt>.../basic_ios.h:      [ 48, 2]    0x8048a23 
                          // new statement // end of text sequence</tt><br>
                      </blockquote>
                      Now, have a look at source code line 9. The
                      extracted debug info above says that we've 3
                      "instruction sets" (beginning at<span
                        class="Apple-converted-space"> </span><tt>0x8048983,<span
                          class="Apple-converted-space"> </span></tt><tt>0x80489a0</tt><span
                        class="Apple-converted-space"> </span>and<span
                        class="Apple-converted-space"> </span><tt>0x80489b5</tt><span
                        class="Apple-converted-space"> </span>respectively)

                      which correspond to line 9. But all of them are
                      labeled with column number 28! According to my
                      understanding, this does not contribute any
                      further information to support my task (= mapping
                      assembler code back to the source lines or even to
                      statements within a line). Did i miss anything?<br>
                    </span></blockquote>
                </div>
                <br>
                <div>You are looking at the line table produced at -O3,
                  i.e. after aggressive optimizer had opportunities to
                  optimize code. Try -O0 and see if it helps.</div>
              </blockquote>
              First of all, thanks for your reply!<br>
              <br>
              I've already checked that at -O0 but it results in the
              same information.</span></blockquote>
          <div><br>
          </div>
          <div>You mean, the instructions with given line and column
            number do not match the source code construct at that
            location ? <br>
          </div>
        </div>
      </blockquote>
      No, they do.<br>
      <blockquote
        cite="mid:28E8AD70-77FD-4FB3-A37C-E3A30284047D@apple.com"
        type="cite">
        <div><br>
          <blockquote type="cite"><span class="Apple-style-span"
              style="border-collapse: separate; font-family: Verdana;
              font-style: normal; font-variant: normal; font-weight:
              normal; letter-spacing: normal; line-height: normal;
              orphans: 2; text-indent: 0px; text-transform: none;
              white-space: normal; widows: 2; word-spacing: 0px;
              font-size: medium;"> (The documentation about "Source
              Level Debugging with LLVM" says "<b>LLVM debug information
                always provides information to accurately read the
                source-level state of the program, regardless of which
                LLVM optimizations have been run</b>, and without any
              modification to the optimizations themselves." [1])<br>
            </span></blockquote>
          <br>
        </div>
        <div>It means the instructions with given line and column number
          matches the source code construct at that line/col number. It
          does not mean that optimizer/code generator will not reorder
          instruction. It also does not mean that optimizer/code
          generator will not emit instruction without line number
          information. It means, if there is a line number information,
          it is as accurate as possible to map source construct.</div>
      </blockquote>
      Yes, that matches my understanding, too. But I thought that clang
      would be able to emit <b>more</b> than one (different) column
      number per line. As in my example, for line number 9 (in
      Main.cpp), there are <b>three</b> entries in the DWARF line
      table. But all of them contain the <b>same</b> information. As a
      consequence, the associated assembler instructions were all mapped
      to the same source line and thus, the column information is
      useless...? I mean, what are the additional information included
      in the column numbers?<br>
      <br>
      I extracted the assembler instructions for the 9th line (x86):<br>
      <tt>.../Main.cpp: 9<br>
            double d = rand() / 10.341; int t = (int)d+j*sum;<br>
                                      ^<br>
        8048983:    e8 40 fe ff ff           call   80487c8
        <rand@plt><br>
        8048988:    89 c7                    mov    %eax,%edi<br>
        804898a:    8b 5d f0                 mov    -0x10(%ebp),%ebx<br>
        804898d:    0f af de                 imul   %esi,%ebx<br>
        80489a0:    f2 0f 2a c7              cvtsi2sd %edi,%xmm0<br>
        80489a4:    f2 0f 5e 05 f0 8a 04     divsd  0x8048af0,%xmm0<br>
        80489ab:    08 <br>
        80489b5:    f2 0f 2c f0              cvttsd2si %xmm0,%esi<br>
        80489b9:    01 de                    add    %ebx,%esi</tt><br>
      <br>
      I hope that makes it clearer... ;-)<br>
      <br>
      BTW, any hints to my cross-compilation-related question?<br>
      <br>
      Best regards<br>
        Adrian<br>
      <pre wrap="">
<fieldset class="mimeAttachmentHeader"></fieldset>
_______________________________________________
LLVM Developers mailing list
<a class="moz-txt-link-abbreviated" href="mailto:LLVMdev@cs.uiuc.edu">LLVMdev@cs.uiuc.edu</a>         <a class="moz-txt-link-freetext" href="http://llvm.cs.uiuc.edu">http://llvm.cs.uiuc.edu</a>
<a class="moz-txt-link-freetext" href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev</a>
</pre>
    </blockquote>
    <br>
  </body>
</html>