<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/59463>59463</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Unaligned STG instruction with -fsanitize=memtag
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            backend:AArch64,
            miscompilation
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          ostannard
      </td>
    </tr>
</table>

<pre>
    When this code is compiled with -fsanitize=memtag at any optimisation level other than -O0, then generated code contains unaligned STG and ST2G instructions, will cause an alignment fault:

```
extern void __aeabi_assert(const char *, const char *, int);
#define assert(e) ((e) ? (void)0 : __aeabi_assert(#e, __FILE__, __LINE__))
typedef __builtin_va_list va_list;
#define va_start(ap, param) __builtin_va_start(ap, param)
#define va_end(ap)          __builtin_va_end(ap)
#define va_arg(ap, type)    __builtin_va_arg(ap, type)
#define alloca(n) (__builtin_alloca(n))

void F52(int P3, int P4, double P5, ...) {
 // Create a 1-byte alloca, but hide the size from the compiler, then escape a
  // pointer to it.
  int V0 = 1;
  __asm("" : "+r"(V0));
 char *V1 = (char *)alloca(V0);
  __asm volatile("" : : "r"(V1));

  assert(P3 == 0);
  assert(P4 == 0);
  assert(P5 == 0.0);
  va_list vl;
  va_start(vl, P5);
  double P6 = va_arg(vl, double);
  assert(P6 == 0.0);
  va_end(vl);

  // Escape a pointer to an int on the stack.
  int V8;
  __asm volatile("" : : "r"(&V8));
}

int main() {
  F52(0, 0, 0.0, 0.0);
  return 0;
}
```

Generated code:
```
$ ~/llvm/build/bin/clang --target=aarch64--none-eabi -march=armv8.5-a+memtag -c test.c -O1 -o - -S -fsanitize=memtag
// ...
F52:                                    // @F52
        .cfi_startproc
// %bb.0:                               // %entry
        .cfi_mte_tagged_frame
        str     d8, [sp, #-48]! // 8-byte Folded Spill
        .cfi_def_cfa_offset 48
 stp     x29, x30, [sp, #8]              // 16-byte Folded Spill
 str     x21, [sp, #24]                  // 8-byte Folded Spill
        stp x20, x19, [sp, #32]             // 16-byte Folded Spill
        add x29, sp, #8
        .cfi_def_cfa w29, 40
        .cfi_offset w19, -8
        .cfi_offset w20, -16
        .cfi_offset w21, -24
 .cfi_offset w30, -32
        .cfi_offset w29, -40
        .cfi_offset b8, -48
        .cfi_remember_state
        sub     sp, sp, #208
// ... skip lots of irrelevant code, which doesn't change x29
.LBB0_11: // %cond.end16
        stg     x20, [x20]
 //APP
        //NO_APP
        st2g    sp, [x29, #-208]  // Fault occurs here
        stg     sp, [x29, #-176] // This would also fault
 sub     sp, x29, #8
        .cfi_def_cfa wsp, 48
        ldp x20, x19, [sp, #32]             // 16-byte Folded Reload
        ldp x29, x30, [sp, #8]              // 16-byte Folded Reload
        ldr x21, [sp, #24]                  // 8-byte Folded Reload
        ldr d8, [sp], #48                   // 8-byte Folded Reload
 .cfi_def_cfa_offset 0
        .cfi_restore w19
        .cfi_restore w20
        .cfi_restore w21
        .cfi_restore w30
 .cfi_restore w29
        .cfi_restore b8
        ret
.LBB0_12: // %vaarg.in_reg
        .cfi_restore_state
 ldr     x9, [x21, #16]
        add     x8, x9, x8
        b .LBB0_9
.Lfunc_end0:
        .size   F52, .Lfunc_end0-F52
 .cfi_endproc
                                        // -- End function
// ...
```

The problem is with the STG and ST2G instructions in the epilogue, which must be 16-byte aligned. The offsets in these instructions are multiples of 16, but x29 (used as a base pointer here) is not 16-byte aligned relative to the stack pointer.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysWFuPozgW_jXOyxEIDEmRhzzUpdMaqTVd2untfUQGDsTbxo5sk66ah_3tK5tLAkVVj3aWKsWAj7_v-NxyHGYMbyTigWwfyPZpwzp7UvqgjGVSMl1tClW9Hv51Qgn2xA2UqkLwY3vmAiv4ye0JgtowyS3_E0ny1GJrWQPMApOvoM6Wt9wwy5UEgRcUoOwJNdgTkxB8jQh9BOvwG5SomcWqJymVtIxLA51kwulYwR_fPgOTbqSfgUtjdVc6XOMwfnIhoGSdQWAS_JIWpYWadcKS5J5ETyQaP3fR8O8f8cWilnBRvII8Z8gKnjNjUFtCs1JJY6E8MQ2E3jumt2-4tITuSfIwwNOkwppLhAkFCd0Dodl0mxzdo6MkdB8BSe7fUhOaoIPP8-NvXz7leX__5bff_f3e_Xs--3rGCmvI86LjwnKZX1guuLEwjCuaXVhuLPM07OyAz0yz1qk2Q1mXWQFDWQ1ie5iuGdSNxMp6ppuJxu1nwJkhrMi8MbgQqmSEZnIw-BVgNnVd6j-9649bSmjGpYXnZPAqPKfurlJdIRCet-4hDEOPfTfYFAg9EnqER43MIjCIg-LVXlV5hKKzcOIVujgHw_9EqLVq_dOQR3rKAjQlOyOwAXsEPysurcsaBdyG46TT8LsLnieIJx87ozHT-vihhFIfW_72Qfsh-x4NFpiWjMH8PfZgLuyn8N5PhuvXLXjgogSzXOCCsCcdGeMF4wgxxfpz4pgd-YLkKpH-UmI7SYQLmSkhxPzlGN8X4TzgHHy7avT7zltlisBeuJ98T5XdR6r0qeBg3lpkcPinIQ5uPc-k97iSfSBZVv6Yh0L2PzmH0N33bOmfu6dbtRx6y7j00rexPySNr-L9R3gdbnet0XZaQrTCMC_G_efn2ZfBtXwvZGkK_yH0KMSlJfToMr1yo1P0WAomGwgCy3SDliRPjOnytEuDQCqJgSu1ELTunZvT7SULtwEj9GH4AgtKsGhsWELwNYZAQQDBH2tfdaMu3m-uOvhnZ5jkHv7CNawkaeTWDPbqr7CseR-jZ63KGRGh26IIo19zXOVRWv26QtBazC1rGqzyWrMW5yLGaj9WmXMs2T4YX4AJTYI0I9snQuORI-sr31GJyn1dn7kQK3QV1nlZs1zVtUELaTbIGHv2Qi8uFB_hJYmWhI5udXPx7l3mUf0XGi_haPoG7wbzl5tx-r5Qr-RLvF-CJ3QJ_mtdh4tV1WiEm62_a0j42cum0YrMYOSfvYLBGswo0m8liHcfyXgjBjQdZGaTvcOCZC2IJ4Bej490LbJeZE1XjS22BWqXFHYZqF3Rj-eZ5WiULTMUzA9-BqGsAVUD1xoFXpi0fbFxzeSJlyeoFBpJ6J1v9mSD3ikeKvzy8BDlcdwX0jG_SiWrEGW1tKCxzRCCY0S7u-3TrH-4f36er-pf__41fzNjLG2uG_Vw-zEl3W5d3A1aHV33C6osO23ghPpNbjfwHlJ8t3NIA9A31_3_VJ2ogAmjhrZ6SLGZ4a8QH8ZsL7x0sqj-Hzn1DxSKVWvIf7eyrCPrv1td3oO9rbmu0nrgNHuL-hdg12rvWhZqNFZp9CXjg1n64VoafzSbRLc6XRd9RFgsIkWjneUinefihTHdhFzmGpv3UWdlxBncXS_7KRniweTx7pquw-WKtJf2LurXvCx0LKBXbqoadSdL1_1FU0czaeXPBUM_9Qg3osG1K_Cqo6yuzcBKJKxeg2GCAD7JChy2Ozivdy6rDdm3E8JZq0Jg687__tjvutB3z-TA-zYVz1yoprspq21nLBQ4pdZwvA_BcfSBOa42OMdkGqHthOVngb52x7vxgPXSn_c6gxUwAwwKZnDqnn3po3unulR2SQ0aXZt8QddlT731uDjcVIek2id7tsFDvLuLsjTZ7vab06HE-q7Y13GW7VhdJHHJirTc75ElcXZXZGzDDzSiNHZ_cUzTfUiLOk63UYlFWhY1JiSNsGVchK6FDZVuNtyYDg_bfbpLNoIVKIz_bYbSgpU_3MEhub-_942s794fCaUtN_050v_K4l5vnzb64CCDomsMSSN3-DFXEsutwMM_Z7-s3Bj63V91Np0Wh5O1Z-MC2MdNw-2pK8JStddG3DOftfo3lpbQo9-SIfTod_XfAAAA__9WFBJR">