<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/103484>103484</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            clang-cl generates suboptimal code for extern __declspec(thread) thread-local variables
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          mcfi
      </td>
    </tr>
</table>

<pre>
    Consider this example https://godbolt.org/z/fEjrzrW8n
```
struct A {
    int a;
};

extern __declspec(thread) struct A *a;

struct A*
getA(void)
{
    return a;
}
```
clang-cl 19.1.0-rc2 generates the following when targeting x64
```
getA: # @getA
.seh_proc getA
# %bb.0:
        sub     rsp, 40
        .seh_stackalloc 40
        .seh_endprologue
        mov     eax, dword ptr [rip + _tls_index]
        mov     rcx, qword ptr gs:[88]
        mov     rax, qword ptr [rcx + 8*rax]
        cmp     byte ptr [rax + __tls_guard@SECREL32], 0
        jne     .LBB0_2
# %bb.1:
        call    __dyn_tls_on_demand_init
.LBB0_2:
        mov     eax, dword ptr [rip + _tls_index]
        mov     rcx, qword ptr gs:[88]
        mov     rax, qword ptr [rcx + 8*rax]
        mov     rax, qword ptr [rax + a@SECREL32]
        add     rsp, 40
        ret
```
and below when targeting arm64
```
getA: // @getA
.seh_proc getA
// %bb.0:
        str     x19, [sp, #-16]!                // 8-byte Folded Spill
        .seh_save_reg_x x19, 16
        str     x30, [sp, #8] // 8-byte Folded Spill
        .seh_save_reg   x30, 8
        .seh_endprologue
        adrp    x19, _tls_index
        ldr     x9, [x18, #88]
        ldr     w8, [x19, :lo12:_tls_index]
        ldr     x8, [x9, x8, lsl #3]
        add     x8, x8, :secrel_hi12:__tls_guard
        ldrb    w8, [x8, :secrel_lo12:__tls_guard]
        cbnz    w8, .LBB0_2
// %bb.1:
        bl      __dyn_tls_on_demand_init
.LBB0_2:
        ldr     w8, [x19, :lo12:_tls_index]
        ldr     x9, [x18, #88]
        ldr     x8, [x9, x8, lsl #3]
        add     x8, x8, :secrel_hi12:a
        ldr     x0, [x8, :secrel_lo12:a]
        .seh_startepilogue
        ldr     x30, [sp, #8] // 8-byte Folded Reload
        .seh_save_reg   x30, 8
        ldr     x19, [sp], #16                  // 8-byte Folded Reload
        .seh_save_reg_x x19, 16
        .seh_endepilogue
        ret
```
Compared to MSVC-generated code below, clang-cl generates suboptimal code. If you look at the code generated by MSVC, there's no call against __dyn_tls_on_demand_init. In general, MSVC doesn't call __dyn_tls_on_demand_init if a thread-local variable doesn't have a constructor.
```
x64 getA    PROC ; COMDAT
        mov     ecx, DWORD PTR _tls_index
        mov rax, QWORD PTR gs:88
        mov     edx, OFFSET FLAT:a
        mov rax, QWORD PTR [rax+rcx*8]
        mov     rax, QWORD PTR [rdx+rax]
 ret     0
getA    ENDP

arm64 |getA|  PROC
        adrp x8,_tls_index
        ldr         w9,[x8,_tls_index]
        ldr x8,[xpr,#0x58]
        ldr         x8,[x8,w9 uxtw #3]
        add x8,x8,a,lsl #0xC
        ldr         x0,[x8,a]
 ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMV12TojgX_jXx5pRWCKhw4YUfbdVUzbw9b3fXziUVSERmImGToPT8-q0EFKS1p2d3L9ayRMh5nnNycr6gWudZwfkCTVdouhnRyuylWhzSXT5KJHtdrGWhc8YVmH2ugdf0UAoOe2NKjfwlIltEtplkiRRmIlWGyPYnItvdw3f1U30LC4Q3CC_RDLdfd6uNqlIDS0DzVfMEACAvDFDkt0_QfNP9d7-8NlwVEMeMp0KXPEUkNHvFKUMkgo6TLOkAeV5DpH2QcbNEJDzK3GLPCnu2KG4qVQzMubWVVNAiG6cCvGjiTfBYpQQyXnBFDddg9hx2Ugh5yosMTntegKEq48be1rPgJqUzzrcb8QEF2N26hYnm-7hUMoXumRMi0ySZYHsc7aYjXSUIR0qXiKwhwJfnjkIbmv6gQsj0zRIvWKmkkFnFLwsHeUQ44rS2XOwkFYPSKEDTlcpLQGQFsRE6zgvGazTdDHAqdbg_L7jMxc10FYY3hOlA2CpJa6ckRGRp13ug9FAiHCWvhl-EaSMcO5OyiiqGAvz8sH56-OwTiyVr6Pb8veB2559XKxyToUO9vkNTKgTCURyz18JxyyJm_EALFudFbtoDaol6uP-u894BtU6kA9edkZSxm8GluLkZ0LRgkHAhT8MMoOrwyxyw9eUjadDI3cgEoxCOai-yxqLpqrEaEX_szVw8eDD4tFzh2AXWVgrGGTyXuQ2AqzSiRx4rnsV1p8CbDRX7eKjYHt1vK-mowl9nLGWq7GzqBdhZQDBn29kntReebbuKq0bsFF7EGoC_FNKzUX47dFvyC8qBmluhhdXiv42mZr0F-UvNU8VFvM8bPb1s7mlJrowbQM8m9qD9ypEUPy_oQf73IumqBCR_qwD8Ix9-7ID-TVfTITd-37-0z3_uLsrwMr8OyZbtd7LhiQtJ2cfToVXRz_Sm3iPie7Nhlt_J8_eU3kn0cxq-2fO9eriWh5IqzsBI-PL8x3p8HhgYpJLxplZaJZfhopsodJXI0uQHKpzsBD7t4FVWIKT8AdS4icORdJzJq9NiCc2eK47IXEMhwbY0oBnNC23gXlxP4FPRcgnLYJmASa4LROam4biHhXwHFJoRbSxkSgUcqcppIniPYk-PHCiksmjGNKkmN71WzwJX8O3RfX16tLG4gvXjl83ypZvb7Ocgj-7Km-a5-fb4tIGvL09vCmEf0PbB_1-EXacNwzvUzEk_brfPDy-w_bx86VLnXdamuyKycp192SXzUMNNJHPIXhu3c6oT77VNe_vwv83X_gDsGi2g-dp1zPm68eC1Zts0mqpwz0-CqUvynGwWnMvCjRrWxzgZK1sq-4f4uJ6-3Xmf_YKw11MEVW1O19Wss5o10u6HIrJuCx-u1-_w4x4_vXLnIPZGbOGzyI_oiC-8OfGnYRj589F-EQYJnbFdwv3Am2Hfozgl8zTahSnDU4bJKF8QTAIcej4hAfHmE8YxScL5NAoDykhgB1N-oLmYCHE82HenUa51xRce9oMwGAmacKHdexkhBT-BW0XEjmIjtbCgcVJlGgVY5NrojsbkRvDFr4sH7KSC99-qbiavHlVKLAavgLnZV8kklQdEttaS9jIulfzOU4PI1tmvEdm2GzwuyF8BAAD__xi9Ejk">