<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - [NVPTX] Invalid PTX when initializing struct from shared-memory pointer"
   href="https://bugs.llvm.org/show_bug.cgi?id=50316">50316</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>[NVPTX] Invalid PTX when initializing struct from shared-memory pointer
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>11.0
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: PTX
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>bugs-llvm@fabian-knorr.info
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Aggregate-initializing a struct from an immediate pointer-to-shared-memory
produces invalid PTX:

    struct wrap { int *mem; };

    __global__ void kernel() {
        __shared__ int mem;
        wrap ptr{&mem};
    }

Compiled with 

    clang++ -c bug.cu --cuda-gpu-arch=sm_75 -std=gnu++17 -U__FLOAT128__
-U__SIZEOF_FLOAT128__

results in

        // .globl       _Z6kernelv
    // _ZZ6kernelvE3mem has been demoted
    .global .align 8 .u64 __const_$__Z6kernelv_$_ptr[1] =
{generic(_ZZ6kernelvE3mem)};

    .visible .entry _Z6kernelv()
    {
        .local .align 8 .b8     __local_depot0[8];
        .reg .b64       %SP;
        .reg .b64       %SPL;
        .reg .b64       %rd<4>;
        // demoted variable
        .shared .align 4 .u32 _ZZ6kernelvE3mem;
        mov.u64         %SPL, __local_depot0;
        cvta.local.u64  %SP, %SPL;
        mov.u64         %rd1, __const_$__Z6kernelv_$_ptr;
        cvta.global.u64         %rd2, %rd1;
        ld.u64  %rd3, [%rd2];
        st.u64  [%SP+0], %rd3;
        ret;
    }

which ptxas rejects:

    ptxas /tmp/bug-26fed7.s, line 11; fatal   : Invalid initial value
expression
    ptxas fatal   : Ptx assembly aborted due to errors

This issue does not appear with -O1/-O2/-O3. In the unoptimized case it can be
worked around by storing the pointer in a temporary first or by adding an
explicit constructor to the struct.

    * Clang versions tested: 11.1.0, 10.0.1
    * CUDA versions tested: 11.3, 10.2
    * Systems tested: x86_64 host and sm_61 / sm_75 GPUs</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>