<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/83552>83552</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [nvptx] Why dose clang generate empty ptx file?
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            clang
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          hlyix
      </td>
    </tr>
</table>

<pre>
    
```
#include "__clang_cuda_builtin_vars.h"
#include "stdio.h"
#define num_kpt 5
#define MAX_DISTANCE 10000
#define CUDA_1D_KERNEL_LOOP(i, n) \
  for (int i = (blockDim.x * blockDim.y * blockDim.z) * blockIdx.z * gridDim.x * gridDim.y + (blockDim.x * blockDim.y * blockDim.z) * blockIdx.y * gridDim.x + (blockDim.x * blockDim.y * blockDim.z) * blockIdx.x + threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; \
       i < (n); \
       i += blockDim.x * blockDim.y * blockDim.z * gridDim.x * gridDim.y * gridDim.z)

__global__ void test(int *__restrict__ address, int *__restrict__ dst, int n1, int m1, int num_roi) {
  int B_PVT[5][5];
  int A_PVT[5][5];
  B_PVT[threadIdx.x % 5][threadIdx.x % 5] = n1 * threadIdx.x;
  A_PVT[threadIdx.x % 5][threadIdx.x % 5] = n1 * threadIdx.x;
  CUDA_1D_KERNEL_LOOP(i, num_roi) {
#pragma unroll
 for (int j = 0; j < num_kpt; ++j) {
      // a. find the minimum distance and index in unmatched keypoints
      int min_index = num_kpt;
 int matched_heatmap_index = B_PVT[threadIdx.x][A_PVT[min_index][4]];
#pragma unroll
      for (int m = 0; m < num_kpt; ++m) {
#pragma unroll
        for (int n = 0; n < num_kpt; ++n) {
          if (A_PVT[m][n] == matched_heatmap_index * 996) {
            B_PVT[m][n] = MAX_DISTANCE;
          }
        }
 }
    }
    dst[threadIdx.x] = B_PVT[threadIdx.x][threadIdx.y];
 }
}

int main() {

  cudaStream_t stream;
 cudaStreamCreate(&stream);
  int num = 5000;

  int *a = (int *)malloc(num * sizeof(int));
  int *b = (int *)malloc(num * sizeof(int));
  for (int i = 0; i < num; i++) {
    a[i] = 0;
 b[i] = i + 1;
  }
  int *c;
  int *d;
  cudaMalloc((void **)&c, sizeof(int) * num);
  cudaMemcpy(c, a, num * sizeof(int), cudaMemcpyHostToDevice);
  cudaMalloc((void **)&d, sizeof(int) * num);
 cudaMemcpy(d, b, num * sizeof(int), cudaMemcpyHostToDevice);

 test<<<4, 128, 0, stream>>>(c, d, 0, 64, 1);

 cudaStreamSynchronize(stream);
  cudaMemcpy(a, c, num * sizeof(int), cudaMemcpyDeviceToHost);
  printf("%d", a[1]);
  return 0;
}

```
clang++ -O2  test.cu --cuda-device-only -S -o test.s


I get
![image](https://github.com/llvm/llvm-project/assets/31645239/0f986ae0-d263-485c-8336-31049939259a)


but nvcc get:
![image](https://github.com/llvm/llvm-project/assets/31645239/b307e16b-22e1-425c-bc84-d2765e343501)





</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0V11vqzgQ_TXOi0UEYyDhIQ8kabRXe7-07X68IYOdxC2YCJuq6a9f2UCAhHa7qi5CgO2Zw8yZYxuoUuIgOV-hYI2C7YzW-lhWq2N-Fi-ztGTnFXK3yI1R6LZn0wQiZJbXjGMEkCRZTuUhyWpGk7QWuRYyeaaVmh8RwJSH0kyU41HG90JyLOsieTppHFwPfIv_SbZf7h_i75s77Lmu615bbP7cxom3TX6_--P73dfk648fPxEsBYINlggijIJN44LxvqywGZMaC4zI1jTSvMyetqKYv2AEMb40z-Pmq4Xqer6wl_mrbR4qwXrvrmWc159CP9-gfw6vQdDHilPWR_8e3ND6fGs9HH5BZD3k2R6G4Y2J2VThDQNYmyp8NKn_ILxvmfRbmdhrkhzyMqV5kuDnUjCsudKtDhDESVJxpSuR6STBlLGKK2XUMzXMjGMzJL3uqbg8GRlXpbDsL9ZdtmZknfz86wEF6wAF2-5GRhbxuxad_4B0jCDArfVUt1W49Cwz41p1oPGvAH1vPk7Qg4CcKnooKK5lVeZ5CzOYqo_2na6R0KPVVLtYWE3BGsH68YpwcyDYIdhhOsd7IRnWR44LIUVRF5gJpanMOKaSYSEZf8FC4loWVGdHzvATP59KIbUa4tlCC5k09paFSxitnTVpMJIjp7qgp4H5RAUbmrsqXNCbbt_eeg28QZQ9BmwVPVvFNFvFR_jHN8CyB5bTwHKiDJa6vcG4pNmkJ1sxGdA3SIMYR1H4Bmg_Ja4AR1vGQJfdgRbbcVffMRoaNcy8vy7d-1UdLJ6jmXyB7R_stRGPkAiW4_q0MZhd9l5XnBaJxso-9KD94KbiVHMLErZWdvUdrjSybkQSmN2UXL2nXfVotz-2bQRRQfO8zMyCbvwhxkq88nLf2Ji33LwIQZx-Fudmz7YKFJ0CbaOR341QKArWoitUnylOh_12F8Le4I2DwrdBZ7d5sUGXYf9blxSCpd1jbK6xTSfMzOJ3laXN3CQwytZC8SI7nREsrRttF85ppjYDj99KpR_KLX8WGZ9AfS9A9tEAR_FZr_TT8bXQdlMmm-b0jatngt1g1wbXKv6uPVt22MUgbFwmgPvJcX-W2bEqpXg1M2Rqeozys9RnH86vyeyhNHmOUU-VkHpvuQcEAbO3jdWnZxaHkXHFdV3JgV6vFoqrb3L7Dd5MAOz8AGx5nGc1dhwTmsNsVE4p8zN27rFTNgZqBGmvX_CB625T8MwcKeiB2_iWR61PCpG42VUPQh_rdJ6VBYJdnj93N-dUlY880wh2VCmuFYId8UI_ABIh2Ln7aBlS7joMQuL4yyBzloSEDvFcP4pIBEFEr77cmmtaayyfs8wGSOJfGWNK3AX3wtQB4J7jQ5A5abb0HQaLMODEJ4HrTcY4vM7YirCIRHTGV97CjWC59ILF7LjyfbaIKNsvIh5CFgJnqR9llPgsZQG4fCZW4ILvEtfzXC8IFnM_9JYuTdkewr0fAkO-ywsq8rnJZV5Wh5lQquarJQkCmOU05bmyf3UArTLA_OBVK5t7Wh8U8t1cKK16BC10bn8F5fNJ253t7-MZs1JxbDHwgUteUc0xL076jE_6Be9FzhHZzeoqX_1v4m3Ihngb9b8BAAD__5QUExg">