<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/83552>83552</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[nvptx] Why dose clang generate empty ptx file?
</td>
</tr>
<tr>
<th>Labels</th>
<td>
clang
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
hlyix
</td>
</tr>
</table>
<pre>
```
#include "__clang_cuda_builtin_vars.h"
#include "stdio.h"
#define num_kpt 5
#define MAX_DISTANCE 10000
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = (blockDim.x * blockDim.y * blockDim.z) * blockIdx.z * gridDim.x * gridDim.y + (blockDim.x * blockDim.y * blockDim.z) * blockIdx.y * gridDim.x + (blockDim.x * blockDim.y * blockDim.z) * blockIdx.x + threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; \
i < (n); \
i += blockDim.x * blockDim.y * blockDim.z * gridDim.x * gridDim.y * gridDim.z)
__global__ void test(int *__restrict__ address, int *__restrict__ dst, int n1, int m1, int num_roi) {
int B_PVT[5][5];
int A_PVT[5][5];
B_PVT[threadIdx.x % 5][threadIdx.x % 5] = n1 * threadIdx.x;
A_PVT[threadIdx.x % 5][threadIdx.x % 5] = n1 * threadIdx.x;
CUDA_1D_KERNEL_LOOP(i, num_roi) {
#pragma unroll
for (int j = 0; j < num_kpt; ++j) {
// a. find the minimum distance and index in unmatched keypoints
int min_index = num_kpt;
int matched_heatmap_index = B_PVT[threadIdx.x][A_PVT[min_index][4]];
#pragma unroll
for (int m = 0; m < num_kpt; ++m) {
#pragma unroll
for (int n = 0; n < num_kpt; ++n) {
if (A_PVT[m][n] == matched_heatmap_index * 996) {
B_PVT[m][n] = MAX_DISTANCE;
}
}
}
}
dst[threadIdx.x] = B_PVT[threadIdx.x][threadIdx.y];
}
}
int main() {
cudaStream_t stream;
cudaStreamCreate(&stream);
int num = 5000;
int *a = (int *)malloc(num * sizeof(int));
int *b = (int *)malloc(num * sizeof(int));
for (int i = 0; i < num; i++) {
a[i] = 0;
b[i] = i + 1;
}
int *c;
int *d;
cudaMalloc((void **)&c, sizeof(int) * num);
cudaMemcpy(c, a, num * sizeof(int), cudaMemcpyHostToDevice);
cudaMalloc((void **)&d, sizeof(int) * num);
cudaMemcpy(d, b, num * sizeof(int), cudaMemcpyHostToDevice);
test<<<4, 128, 0, stream>>>(c, d, 0, 64, 1);
cudaStreamSynchronize(stream);
cudaMemcpy(a, c, num * sizeof(int), cudaMemcpyDeviceToHost);
printf("%d", a[1]);
return 0;
}
```
clang++ -O2 test.cu --cuda-device-only -S -o test.s
I get
![image](https://github.com/llvm/llvm-project/assets/31645239/0f986ae0-d263-485c-8336-31049939259a)
but nvcc get:
![image](https://github.com/llvm/llvm-project/assets/31645239/b307e16b-22e1-425c-bc84-d2765e343501)
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0V11vqzgQ_TXOi0UEYyDhIQ8kabRXe7-07X68IYOdxC2YCJuq6a9f2UCAhHa7qi5CgO2Zw8yZYxuoUuIgOV-hYI2C7YzW-lhWq2N-Fi-ztGTnFXK3yI1R6LZn0wQiZJbXjGMEkCRZTuUhyWpGk7QWuRYyeaaVmh8RwJSH0kyU41HG90JyLOsieTppHFwPfIv_SbZf7h_i75s77Lmu615bbP7cxom3TX6_--P73dfk648fPxEsBYINlggijIJN44LxvqywGZMaC4zI1jTSvMyetqKYv2AEMb40z-Pmq4Xqer6wl_mrbR4qwXrvrmWc159CP9-gfw6vQdDHilPWR_8e3ND6fGs9HH5BZD3k2R6G4Y2J2VThDQNYmyp8NKn_ILxvmfRbmdhrkhzyMqV5kuDnUjCsudKtDhDESVJxpSuR6STBlLGKK2XUMzXMjGMzJL3uqbg8GRlXpbDsL9ZdtmZknfz86wEF6wAF2-5GRhbxuxad_4B0jCDArfVUt1W49Cwz41p1oPGvAH1vPk7Qg4CcKnooKK5lVeZ5CzOYqo_2na6R0KPVVLtYWE3BGsH68YpwcyDYIdhhOsd7IRnWR44LIUVRF5gJpanMOKaSYSEZf8FC4loWVGdHzvATP59KIbUa4tlCC5k09paFSxitnTVpMJIjp7qgp4H5RAUbmrsqXNCbbt_eeg28QZQ9BmwVPVvFNFvFR_jHN8CyB5bTwHKiDJa6vcG4pNmkJ1sxGdA3SIMYR1H4Bmg_Ja4AR1vGQJfdgRbbcVffMRoaNcy8vy7d-1UdLJ6jmXyB7R_stRGPkAiW4_q0MZhd9l5XnBaJxso-9KD94KbiVHMLErZWdvUdrjSybkQSmN2UXL2nXfVotz-2bQRRQfO8zMyCbvwhxkq88nLf2Ji33LwIQZx-Fudmz7YKFJ0CbaOR341QKArWoitUnylOh_12F8Le4I2DwrdBZ7d5sUGXYf9blxSCpd1jbK6xTSfMzOJ3laXN3CQwytZC8SI7nREsrRttF85ppjYDj99KpR_KLX8WGZ9AfS9A9tEAR_FZr_TT8bXQdlMmm-b0jatngt1g1wbXKv6uPVt22MUgbFwmgPvJcX-W2bEqpXg1M2Rqeozys9RnH86vyeyhNHmOUU-VkHpvuQcEAbO3jdWnZxaHkXHFdV3JgV6vFoqrb3L7Dd5MAOz8AGx5nGc1dhwTmsNsVE4p8zN27rFTNgZqBGmvX_CB625T8MwcKeiB2_iWR61PCpG42VUPQh_rdJ6VBYJdnj93N-dUlY880wh2VCmuFYId8UI_ABIh2Ln7aBlS7joMQuL4yyBzloSEDvFcP4pIBEFEr77cmmtaayyfs8wGSOJfGWNK3AX3wtQB4J7jQ5A5abb0HQaLMODEJ4HrTcY4vM7YirCIRHTGV97CjWC59ILF7LjyfbaIKNsvIh5CFgJnqR9llPgsZQG4fCZW4ILvEtfzXC8IFnM_9JYuTdkewr0fAkO-ywsq8rnJZV5Wh5lQquarJQkCmOU05bmyf3UArTLA_OBVK5t7Wh8U8t1cKK16BC10bn8F5fNJ253t7-MZs1JxbDHwgUteUc0xL076jE_6Be9FzhHZzeoqX_1v4m3Ihngb9b8BAAD__5QUExg">