[llvm] [AArch64] Add MATCH loops to LoopIdiomVectorizePass (PR #101976)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 01:21:10 PST 2024
rj-jesus wrote:
Hi @david-arm and @paulwalker-arm, I've rebased this patch to use the version of `@llvm.experimental.vector.match` committed last week. I've also added a simple cost model which currently only lets the transformation kick in for supported SVE vectors.
Currently, a simple double find loop like the one in the description should result in something like:
```gas
find_first_of_i8: // @find_first_of_i8
.cfi_startproc
// %bb.0:
cmp x0, x1
b.eq .LBB0_7
// %bb.1:
cmp x2, x3
b.eq .LBB0_7
// %bb.2: // %.preheader
ptrue p0.b, vl16
.LBB0_3: // =>This Loop Header: Depth=1
// Child Loop BB0_4 Depth 2
whilelo p1.b, x0, x1
mov x8, x2
mov x9, x2
and p1.b, p0/z, p0.b, p1.b
ld1b { z0.b }, p1/z, [x0]
.LBB0_4: // Parent Loop BB0_3 Depth=1
// => This Inner Loop Header: Depth=2
whilelo p2.b, x8, x3
and p2.b, p0/z, p0.b, p2.b
ld1b { z1.b }, p2/z, [x9]
mov z2.b, b1
sel z1.b, p2, z1.b, z2.b
mov z1.q, q1
match p2.b, p1/z, z0.b, z1.b
b.ne .LBB0_8
// %bb.5: // in Loop: Header=BB0_4 Depth=2
add x9, x9, #16
add x8, x8, #16
cmp x9, x3
b.lo .LBB0_4
// %bb.6: // in Loop: Header=BB0_3 Depth=1
add x0, x0, #16
cmp x0, x1
b.lo .LBB0_3
.LBB0_7: // %.loopexit1
mov x0, x1
ret
.LBB0_8: // %.loopexit
ptrue p0.b
brkb p0.b, p0/z, p2.b
incp x0, p0.b
ret
```
Could you please let me know if you have any thoughts or suggestions? Many thanks in advance!
https://github.com/llvm/llvm-project/pull/101976
More information about the llvm-commits
mailing list