[llvm] [RISCV] Fix missing scaling by LMUL in cost model (PR #73342)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 28 21:20:02 PST 2023
lukel97 wrote:
>From the llvm test suite, this reduces the VF in a couple of places, e.g. in SingleSource/Regression/C/gcc-c-torture/execute/loop-2d.c:
```diff
--- build.head/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-loop-2d.dir/loop-2d.s 2023-11-27 04:14:09.994315098 +0000
+++ build/SingleSource/Regression/C/gcc-c-torture/execute/CMakeFiles/GCC-C-execute-loop-2d.dir/loop-2d.s 2023-11-27 04:14:04.982461178 +0000
@@ -12,7 +12,7 @@
.Lpcrel_hi0:
auipc a1, %pcrel_hi(a)
addi a2, a1, %pcrel_lo(.Lpcrel_hi0)
- li a1, 16
+ li a1, 8
add a3, a2, a3
bgeu a0, a1, .LBB0_3
# %bb.2:
@@ -21,38 +21,35 @@
.LBB0_3: # %vector.ph
slli a4, a0, 32
srli a4, a4, 32
- andi a5, a4, -16
+ andi a5, a4, -8
slli a1, a5, 2
sub a3, a3, a1
subw a1, a0, a5
- vsetivli zero, 8, e32, m2, ta, ma
- vid.v v8
- vrsub.vi v8, v8, 0
+ vsetivli zero, 4, e32, m1, ta, ma
+ vid.v v9
+ vrsub.vi v8, v9, 0
vadd.vx v8, v8, a0
slli a0, a0, 2
add a0, a0, a2
- addi a0, a0, -64
+ addi a0, a0, -32
li a6, 3
addi a7, a2, -3
- addi t0, a2, -27
- vsetvli zero, zero, e16, m1, ta, ma
- vid.v v10
- vrsub.vi v10, v10, 7
+ addi t0, a2, -15
+ vrsub.vi v9, v9, 3
mv t1, a5
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
- vsetvli zero, zero, e32, m2, ta, ma
- vmul.vx v12, v8, a6
- vadd.vx v14, v12, a7
- vadd.vx v12, v12, t0
- vrgatherei16.vv v16, v14, v10
- addi t2, a0, 32
- vse32.v v16, (t2)
- vrgatherei16.vv v14, v12, v10
- vse32.v v14, (a0)
- vadd.vi v8, v8, -16
- addi t1, t1, -16
- addi a0, a0, -64
+ vmul.vx v10, v8, a6
+ vadd.vx v11, v10, a7
+ vadd.vx v10, v10, t0
+ vrgather.vv v12, v11, v9
+ addi t2, a0, 16
+ vse32.v v12, (t2)
+ vrgather.vv v11, v10, v9
+ vse32.v v11, (a0)
+ vadd.vi v8, v8, -8
+ addi t1, t1, -8
+ addi a0, a0, -32
bnez t1, .LBB0_4
# %bb.5: # %middle.block
beq a5, a4, .LBB0_8
```
I presume in this specific case its coming from experimental_stepvector being more expensive now.
(As an aside, should we be able to PRE on that `vsetvli zero, zero, e32, m2, ta, ma` in the loop body?)
https://github.com/llvm/llvm-project/pull/73342
More information about the llvm-commits
mailing list