[llvm] [AArch64] Lower extending sitofp using tbl (PR #92528)
Shu-Chun Weng via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 28 12:09:49 PDT 2024
scweng wrote:
The .ii file is still over 500k bytes long because eigen is a header library. But I've seen the assembly diff boils down to two snippets of exactly this. Before this commit
```
fmov s0, w21
mov v0.s[1], w22
shl v0.2s, v0.2s, #24
sshr v0.2s, v0.2s, #24
scvtf v0.2s, v0.2s
fcvtzs w8, s0
str q0, [sp] // 16-byte Folded Spill
cmp w8, w21, sxtb
b.ne .LBB16_10
// %bb.2: // %if.end
// in Loop: Header=BB16_1 Depth=1
// ... function call removed ...
ldr q0, [sp] // 16-byte Folded Reload
mov s0, v0.s[1]
fcvtzs w8, s0
cmp w8, w22, sxtb
b.ne .LBB16_9
```
At this commit:
```
mov v0.b[3], w21
mov v0.b[7], w22
scvtf v0.2s, v0.2s, #24
fcvtzs w8, s0
str q0, [sp] // 16-byte Folded Spill
cmp w8, w21, sxtb
b.ne .LBB16_10
// %bb.2: // %if.end
// in Loop: Header=BB16_1 Depth=1
// ... function call removed ...
ldr q0, [sp] // 16-byte Folded Reload
mov s0, v0.s[1]
fcvtzs w8, s0
cmp w8, w22, sxtb
b.ne .LBB16_9
```
The other part is identical except for the input registers (`w20` and `w19` except `w21` and `w22`). Unified diff below:
```
--- array_cwise_no_sitofp_tbl.s 2024-06-29 02:48:01.129830833 +0800
+++ array_cwise_sitofp_tbl.s 2024-06-29 02:50:15.145362022 +0800
@@ -1296,11 +1296,9 @@
bl rand
mov w19, w0
bl rand
- fmov s0, w21
- mov v0.s[1], w22
- shl v0.2s, v0.2s, #24
- sshr v0.2s, v0.2s, #24
- scvtf v0.2s, v0.2s
+ mov v0.b[3], w21
+ mov v0.b[7], w22
+ scvtf v0.2s, v0.2s, #24
fcvtzs w8, s0
str q0, [sp] // 16-byte Folded Spill
cmp w8, w21, sxtb
@@ -1317,13 +1315,11 @@
b.ne .LBB16_9
// %bb.3: // %if.end.1
// in Loop: Header=BB16_1 Depth=1
- fmov s0, w20
+ mov v0.b[3], w20
ldr x8, [x24, :lo12:.L_MergedGlobals+8]
sub x0, x8, #32
- mov v0.s[1], w19
- shl v0.2s, v0.2s, #24
- sshr v0.2s, v0.2s, #24
- scvtf v0.2s, v0.2s
+ mov v0.b[7], w19
+ scvtf v0.2s, v0.2s, #24
str q0, [sp] // 16-byte Folded Spill
bl _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE5c_strEv
ldr q0, [sp] // 16-byte Folded Reload
```
I'm not sure how the two snippets behave differently. And if they do behave differently, it feels like a latent bug uncovered by this commit?
https://github.com/llvm/llvm-project/pull/92528
More information about the llvm-commits
mailing list