[llvm] [AArch64] Lower extending sitofp using tbl (PR #92528)

Shu-Chun Weng via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 28 12:09:49 PDT 2024


scweng wrote:

The .ii file is still over 500k bytes long because eigen is a header library. But I've seen the assembly diff boils down to two snippets of exactly this. Before this commit

```
          fmov    s0, w21
          mov     v0.s[1], w22
          shl     v0.2s, v0.2s, #24
          sshr    v0.2s, v0.2s, #24
          scvtf   v0.2s, v0.2s
          fcvtzs  w8, s0
          str     q0, [sp]                        // 16-byte Folded Spill
          cmp     w8, w21, sxtb
          b.ne    .LBB16_10
  // %bb.2:                               // %if.end
                                          //   in Loop: Header=BB16_1 Depth=1
          // ... function call removed ...
          ldr     q0, [sp]                        // 16-byte Folded Reload
          mov     s0, v0.s[1]
          fcvtzs  w8, s0
          cmp     w8, w22, sxtb
          b.ne    .LBB16_9
```

At this commit:

```
          mov     v0.b[3], w21
          mov     v0.b[7], w22
          scvtf   v0.2s, v0.2s, #24
          fcvtzs  w8, s0
          str     q0, [sp]                        // 16-byte Folded Spill
          cmp     w8, w21, sxtb
          b.ne    .LBB16_10
  // %bb.2:                               // %if.end
                                          //   in Loop: Header=BB16_1 Depth=1
          // ... function call removed ...
          ldr     q0, [sp]                        // 16-byte Folded Reload
          mov     s0, v0.s[1]
          fcvtzs  w8, s0
          cmp     w8, w22, sxtb
          b.ne    .LBB16_9
```

The other part is identical except for the input registers (`w20` and `w19` except `w21` and `w22`). Unified diff below:

```
--- array_cwise_no_sitofp_tbl.s 2024-06-29 02:48:01.129830833 +0800
+++ array_cwise_sitofp_tbl.s    2024-06-29 02:50:15.145362022 +0800
@@ -1296,11 +1296,9 @@
        bl      rand
        mov     w19, w0
        bl      rand
-       fmov    s0, w21
-       mov     v0.s[1], w22
-       shl     v0.2s, v0.2s, #24
-       sshr    v0.2s, v0.2s, #24
-       scvtf   v0.2s, v0.2s
+       mov     v0.b[3], w21
+       mov     v0.b[7], w22
+       scvtf   v0.2s, v0.2s, #24
        fcvtzs  w8, s0
        str     q0, [sp]                        // 16-byte Folded Spill
        cmp     w8, w21, sxtb
@@ -1317,13 +1315,11 @@
        b.ne    .LBB16_9
 // %bb.3:                               // %if.end.1
                                         //   in Loop: Header=BB16_1 Depth=1
-       fmov    s0, w20
+       mov     v0.b[3], w20
        ldr     x8, [x24, :lo12:.L_MergedGlobals+8]
        sub     x0, x8, #32
-       mov     v0.s[1], w19
-       shl     v0.2s, v0.2s, #24
-       sshr    v0.2s, v0.2s, #24
-       scvtf   v0.2s, v0.2s
+       mov     v0.b[7], w19
+       scvtf   v0.2s, v0.2s, #24
        str     q0, [sp]                        // 16-byte Folded Spill
        bl      _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5c_strEv
        ldr     q0, [sp]                        // 16-byte Folded Reload
```

I'm not sure how the two snippets behave differently. And if they do behave differently, it feels like a latent bug uncovered by this commit?

https://github.com/llvm/llvm-project/pull/92528


More information about the llvm-commits mailing list