<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - clang++ CUDA miscompile with -O1"
   href="https://bugs.llvm.org/show_bug.cgi?id=37483">37483</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>clang++ CUDA miscompile with -O1
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>new-bugs
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>6.0
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>new bugs
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>wangxy381@buaa.edu.cn
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Created <span class=""><a href="attachment.cgi?id=20309" name="attach_20309" title="test case that trigger the bug">attachment 20309</a> <a href="attachment.cgi?id=20309&action=edit" title="test case that trigger the bug">[details]</a></span>
test case that trigger the bug

I have encountered a miscompile for cuda program with clang 6.0 , with
optimization '-O1'and disappear with '-O2' and '-O3'.
I have reduced the test-case appended below,

****************************demo.cu********************************
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda.h"

#include <stdio.h>

struct S1 {
    long shoud_not_change;
    long irrelevant;
};

__device__ int * func_3(int ** p_221, struct S1 * ptr_arg)
{
    long *l_302 = &ptr_arg->irrelevant;
    (*l_302) = 0;
    return *p_221;
}

__device__ void func_2(struct S1 * ptr_arg)
{
    int *l_231 = (int*)0;
    func_3(&l_231, ptr_arg);
}


__device__ void func_1(struct S1 * ptr_arg)
{
    func_2(ptr_arg);
}

extern "C" __global__ void entry(long *result)
{
    struct S1 c_783 = {
        11L, // shoud_not_change  
        0L, // irrelevant
    };
    result[0] = (long)c_783.shoud_not_change;
    struct S1* ptr_arg = &c_783;
    __syncthreads();
    func_1(ptr_arg);
    __syncthreads();
    result[1] = (long)ptr_arg->shoud_not_change;
}
int main(void)
{
    long h_result[2];
    long* result;
    size_t result_size = sizeof(long)*2;
    cudaMalloc((void**)&result,result_size);
    entry<<<100,100>>>(result);
    cudaMemcpy(&h_result,result,result_size,cudaMemcpyDeviceToHost);
    printf("result[0] is %ld, result[1] is %ld\n", h_result[0], h_result[1]);

}
------------------------------------------------------------------------------
The failing and passing behaviors are:
-----------------------------------------------------------------------------
FAIL
$clang++ demo.cu -O1 -o bug --cuda-gpu-arch=sm_50 
--cuda-path=/usr/local/cuda-8.0 
-I/usr/local/cuda-8.0/targets/x86_64-linux/include/  -L/usr/local/cuda/lib64
-lcudart -lcuda

$./bug

output:        result[0] is 11, result[1] is 0
-----------------------------------------------------------------------------
PASS
$clang++ demo.cu -O2 -o bugfree-O2 --cuda-gpu-arch=sm_50 
--cuda-path=/usr/local/cuda-8.0 
-I/usr/local/cuda-8.0/targets/x86_64-linux/include/  -L/usr/local/cuda/lib64
-lcudart -lcuda

$./bugfree-O2

output:        result[0] is 11, result[1] is 11
-------------------------------------------------------------------------------
PASS
$clang++ demo.cu -O3 -o bugfree-O3 --cuda-gpu-arch=sm_50 
--cuda-path=/usr/local/cuda-8.0 
-I/usr/local/cuda-8.0/targets/x86_64-linux/include/  -L/usr/local/cuda/lib64
-lcudart -lcuda

$./bugfree-O3

output:        result[0] is 11, result[1] is 11
------------------------------------------------------------------------------



Configuration->
------------------------------------------------------------------------------
#uname -a
Linux G1024-workstation1 4.4.0-121-generic #145-Ubuntu SMP Fri Apr 13 13:47:23
UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
------------------------------------------------------------------------------
$ clang++ --version
clang version 6.0.0 (tags/RELEASE_600/final)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
-------------------------------------------------------------------------------
cat /etc/*release*/
DISTRIB_ID=Ubuntu
DISTRIB_RELEASE=16.04
DISTRIB_CODENAME=xenial
DISTRIB_DESCRIPTION="Ubuntu 16.04.2 LTS"
NAME="Ubuntu"
VERSION="16.04.2 LTS (Xenial Xerus)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 16.04.2 LTS"
VERSION_ID="16.04"
HOME_URL="<a href="http://www.ubuntu.com/">http://www.ubuntu.com/</a>"
SUPPORT_URL="<a href="http://help.ubuntu.com/">http://help.ubuntu.com/</a>"
BUG_REPORT_URL="<a href="http://bugs.launchpad.net/ubuntu/">http://bugs.launchpad.net/ubuntu/</a>"
VERSION_CODENAME=xenial
UBUNTU_CODENAME=xenial
Device: NVIDIA Quadro K2200
-------------------------------------------------------------------------------
#nvidia-smi
Wed May 16 15:07:23 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 384.111                Driver Version: 384.111                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|===============================+======================+======================|
|   0  Quadro K2200        Off  | 00000000:03:00.0 Off |                  N/A |
| 42%   52C    P0     2W /  39W |      0MiB /  4040MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+

CUDA toolkit version:release 8.0, V8.0.61
------------------------------------------------------------------------------</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>