<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/95811>95811</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            LLVM generates unaligned access with -mstrict-align on AArch64
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          vit9696
      </td>
    </tr>
</table>

<pre>
    Consider the following command-line:

```
clang -o t.o -c -target aarch64-gnu-linux-eabi -mstrict-align -mcpu=cortex-a55 -O3 t.c 
```

(1) Example 1 ("GodBolt":https://godbolt.org/z/35s8h7Kdz):

<cut>

<details>

```c
typedef struct
{
  void* ccc[3];
  void* mmm;
  void* ddd;
  unsigned eee[2];
  _Bool bbb[8];
  
  _Bool ggg;
  _Bool abs;
 unsigned char kkk[16];
} struct1;

typedef struct struct2
{
 void* block[3];
  unsigned char pass_flags[2];
  unsigned int index[19];

  struct1 yyy[];
} struct2;


union union1
{
  unsigned u32;
  struct
  {
    unsigned ggg : 24;
    unsigned ffffffx : 2;
    unsigned ffffffy : 2;
    unsigned ffffffz : 2;
    unsigned ffffffw : 2;
  } aaa;
};

void func2(unsigned value);

unsigned func1(struct2* instr, unsigned yyy);

static void func3(struct1* yyy, union union1 zzzz)
{
 func2(zzzz.u32);
  if (yyy->ddd)
  {
    union union1 zzzz = {0};
    func3(yyy->ddd, zzzz);
  }
}

void bug(struct2* aaa);

void bug(struct2* aaa)
{
  for (unsigned i = 0; i < 256; i++)
  {
    unsigned a = func1(aaa, i);
    union union1 yyy;

    yyy.aaa.ggg = aaa->yyy[i].ggg;

    if (a <= 4)
 {
      yyy.aaa.ffffffx = aaa->yyy[i].kkk[0];
      yyy.aaa.ffffffy = aaa->yyy[i].kkk[1];
      yyy.aaa.ffffffz = aaa->yyy[i].kkk[2];
 yyy.aaa.ffffffw = aaa->yyy[i].kkk[3];
    }

    func3(&aaa->yyy[i], yyy);
 }
}
```

</details>

</cut>

Output:

<cut>

<details>

```
.LCPI0_0:
 .word   24                              // 0x18
        .word   26 // 0x1a
        .word   28 // 0x1c
        .word   30                              // 0x1e
bug:                                    // @bug
        sub sp, sp, #80
        stp     x29, x30, [sp, #16]             // 16-byte Folded Spill
        str     x23, [sp, #32]                  // 8-byte Folded Spill
        stp     x22, x21, [sp, #48]             // 16-byte Folded Spill
        stp     x20, x19, [sp, #64]             // 16-byte Folded Spill
        add     x29, sp, #16
        mov     x19, x0
 mov     x20, xzr
        mov     w22, #80                        // =0x50
        adrp    x8, .LCPI0_0
        ldr     q0, [x8, :lo12:.LCPI0_0]
        str     q0, [sp]                        // 16-byte Folded Spill
        b       .LBB0_2
.LBB0_1: //   in Loop: Header=BB0_2 Depth=1
        add     x20, x20, #1
 cmp     x20, #256
        b.eq    .LBB0_7
.LBB0_2: // =>This Loop Header: Depth=1
        mov     x0, x19
 mov     w1, w20
        bl      func1
        cmp     w0, #4
 b.hi    .LBB0_4
        madd    x8, x20, x22, x19
        ldr q1, [sp]                        // 16-byte Folded Reload
        ldur s0, [x8, #162]
        ushll   v0.8h, v0.8b, #0
        bic     v0.4h, #252
        ushll   v0.4s, v0.4h, #0
        ushl    v0.4s, v0.4s, v1.4s
        ext     v1.16b, v0.16b, v0.16b, #8
        orr     v0.8b, v0.8b, v1.8b
        fmov    x8, d0
        lsr     x9, x8, #32
 orr     w8, w8, w9
        b       .LBB0_5
.LBB0_4: //   in Loop: Header=BB0_2 Depth=1
        and     w8, w21, #0xff000000
.LBB0_5:                                //   in Loop: Header=BB0_2 Depth=1
        madd    x9, x20, x22, x19
        ldrb w9, [x9, #160]
        orr     w21, w8, w9
        mov     w0, w21
 bl      func2
        madd    x8, x20, x22, x19
        ldr x23, [x8, #136]
        cbz     x23, .LBB0_1
.LBB0_6: //   Parent Loop BB0_2 Depth=1
        mov     w0, wzr
 bl      func2
        ldr     x23, [x23, #32]
        cbnz    x23, .LBB0_6
        b       .LBB0_1
.LBB0_7:
        ldp     x20, x19, [sp, #64]             // 16-byte Folded Reload
        ldp     x22, x21, [sp, #48]             // 16-byte Folded Reload
        ldp     x29, x30, [sp, #16]             // 16-byte Folded Reload
        ldr     x23, [sp, #32]                  // 8-byte Folded Reload
        add     sp, sp, #80
        ret
```

</details>

</cut>

Note `ldur s0, [x8, #162]`.

(2) Example 2 ("GodBolt":https://godbolt.org/z/nsovb6s1f):

Output:

<cut>

<details>

```c
void f(char p[restrict], char *q)
{
    for (int i = 0; i < 4; i++)
 p[i] = -q[i];
}

void g(char p[restrict], char *q)
{
    for (int i = 0; i < 4; i++)
        p[i] = q[i];
}
```

</details>

</cut>

Output:

<cut>

<details>

```c
f: // @f
        movi    v0.2d, #0000000000000000
        ldr     s1, [x1]
        usubw   v0.8h, v0.8h, v1.8b
        umov    w8, v0.h[3]
        umov    w9, v0.h[2]
        umov    w10, v0.h[1]
        umov    w11, v0.h[0]
 strb    w8, [x0, #3]
        strb    w9, [x0, #2]
        strb w10, [x0, #1]
        strb    w11, [x0]
        ret
g: // @g
        ldrb    w8, [x1, #3]
        ldrb w9, [x1, #2]
        ldrb    w10, [x1, #1]
        ldrb    w11, [x1]
        strb    w8, [x0, #3]
        strb    w9, [x0, #2]
 strb    w10, [x0, #1]
        strb    w11, [x0]
 ret
```

</details>
</cut>

Note `ldr s1, [x1]`

Tested on 18.1.8.

This results in unaligned access exception raised on targets with strict alignment enabled in hardware.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMWVuvo7oV_jXOixWEzSXwkIdcZrdVpz1H7VFfRwY7hA6BbGxy-_WVbS7GkOzpzFbVKAoEvrX8rZuXMYTzPCsZW4NgC4L9gjTiWNXrSy7iMA4XSUXv611V8pyyGoojg4eqKKprXmYwrU4nUtJlkZcMeBvg7oHb_YZu-1V_04KUGVxWUDgVXKZwKUidMQEJqdNj6C-zspFamtuSkSSHyxMXdZ6KJSnyrITLU3pugLdPq1qw25IEAVz-5kHhpHB2tPYXRwjgGH65kdO5YBBBgCOA8Z8quq0KATAG3uYoxJlL7vgN4LesoklVCKeqM4DfHgC_eQGPjqu_0gfAsW2it0sbAbwv1kXKBMkLbt_oKKb6v7ifGWUHyEXdpKLFrLb6BMJLlVOANzBNUxBsPRDsgTe5eTqdZq5SSo2rTanCSyFjDARbPNb0bVtVBUySBATbaHxrDMmybCJGEj5c68dJj6SG379_B8EWhaZKsNq31qLh2owv2gO2fdLZlxRV-n3qlDGBM-H826EgGZ8a3SPzUsC8pOwmucYjri205Qvv97sujxlrsCWlf5syr0qoftEkuj2DxsMGMTMXIDTwhkSWZRB4G4h9Q9C4f1Cfm8a8gtw_hjw-hlwnEOkYQojpKMtBMpDw0JQpBjjq9V1I0TBVZduxH7vxmjJFAEed0_EG5iUXNcC7gZSM00QFF0TkKeyH9XotSGpRMjtoBgw-Hg9V8VbgOtLytiNjZ4wFYX6QM8z9fl8C74uswk7BJJbWSBB4ewlxTV9JZMfW1LnryY18Pjjb9nTSZGO3yehMnPQSaufvoaqhGbtcWeACb6tOdxAHofoD8FZ9nzmilSdKvouwGnQnhWMr9Qy_yajZ1QrlVYcQ4ugqUYkoHafLNwfB3jEnskFOh07S2Ekxf2A8IjwMMJTZ3CB6-nPH085U_P5SHH0k_ngpPp70xpLXl5KePbCVVWZmAhxO1MjgWYU4l6GzPdvbAfz2pIOqe9OW-1sjzvLiZzRn_df5uvv9L-43t9cJnWtVUwgh9uHLj15GQPeGIjNucFAQGhjyBBMZmHQe47k_SoRpBbK0vc1roZEo8F0pMxqdNwnkZxlf_QuwF7kWRJzV8YZjibh5rgIG215CrQrmRkThMrkLBt-qgjIK_3nOi8JWXrfKPVurh22tpuroY80dbaxoY2QP4Ee_QLtTrpxxQ7GtPPR_Wjmh1HS46eYR7lRdNE6Pfuvi1l9vyT3qebmr9owK-Uep4-3dW-DaPGvlhVsk1fQlNsIUVMf3vUsaDQbepqiQXK73YsF-PjPejXSby4f_0rlJe3S-brfuN9xND_IPkuXUKoIwL-HXqjrLa39mhLIaeHslAvfsLI7A26NnYdNu1wcZtxaXnkZJA7Anm-qYncPeB3Yrkx022AFvD7wvfxxzrjj2BDfPuPUp0afrOFWuqjiu2IpeUuij7uOjW50x184Wv72fOMd8MMG3eLRO0lnQewqPWRm5845-Pvr_YEVFqK2zqSG3klGWFp4kYMOPhbT_4jrRUeLkSdIK2I7KU3W8uI5_7IOLnyr0eauwR7tTLLSx-gTJkxGY3YQeHTkoTFr05EzW-ViuquuOdZSMLLwgeTICH9pc0T6jdp3zdh7XU1E0zOEa1g11VXfa3_hVaQZm8vu_WJolNUdvWwH23Nvh4KqPOVjwA23157n0JRD_WAkk0k9tusZ9uk7ny97D2rp5F_f17naOaIvWKHT8iyU7tPKhvLxwwjdNHjpjNLybgo04hKOg_05qVgo937128NjGvvu9sLHrUgb19qxdhtjUy8eEuj2Tj5J5ZNdqWIX243_WcmJ-zvusldBr7b-6PJzX_inrwznVXb_-YPlbM_HJjzd_rwSDIHRf96LQdUbKcITNvU_8M3ufJa8uScjRYbr3-ZnPXKmxAXEAONL7dyDY1kzvArePlOo6wJv3uR2Jfk9C7enZ2xH-7GbEuX1eVejle_f0am5c2Rsk2f-SX_sZ0XzB8v_xebqN7cFcjfruYTIFqzXgxXUw7Xqt9Zmvc97NTTc0syBrkqu9IDs-Wa40bRvQffDiOsduH2QeFhuwmbVgB0OugZuh2OOQgTOaNRd1MtCSdnbr5ym3Hhvb2ClBhW3ZmcgpxV4rQgPYBvWTXjaOczazPBlZg55aYy1l0FNTeqWDNeipNQP4ReJ8utMHH_6yw3-qvXzYWGq7kMZq_2BcMAqrEqLIQU406jTqsbJmvCkEl8vbplTv7BiFJE0Z55DdUnYWeVXCmuRc69Fv_zi85uII9SQKldhJLtpYSZJCvZ6BR1LTK6mZs6Brj8ZeTBZsjVYoCn0vRv7iuEb0EK5Y4gZ-EkcBDigLUEhjisNVEMcHvMjX2MW-G6IVWrkB8p3YTxLqe_QQuiENwgD4LjuRvHCK4nKS7W-Rc96wdRxECC0KkrCCq5ejGJfsCtVN2UCD_aJeS5ll0mQc-G6Rc8EHLSIXBVt__fqvv8GMlawmgvGpe5QLrPedVQk3G_VidNHUxdrq07k4NomTVieA3-Rg7WF5rqt_s1QA_KYocoDftAmXNf5PAAAA___YDXX_">