[llvm] [AArch64] Disable consecutive store merging when Neon is unavailable (PR #111519)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 8 06:03:33 PDT 2024


paulwalker-arm wrote:

```
define void @consecutive_stores_quadruple(ptr noalias %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3) "aarch64_pstate_sm_enabled" {
  %dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
  %dest2 = getelementptr inbounds i8, ptr %dest1, i64 4
  %dest3 = getelementptr inbounds i8, ptr %dest2, i64 4
  %reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
  %reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec1)
  %reduce2 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec2)
  %reduce3 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec3)
  store float %reduce0, ptr %dest0, align 4
  store float %reduce1, ptr %dest1, align 4
  store float %reduce2, ptr %dest2, align 4
  store float %reduce3, ptr %dest3, align 4
  ret void
}

define void @consecutive_stores_quadruple2(ptr noalias %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3) "aarch64_pstate_sm_enabled" {
  %dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
  %dest2 = getelementptr inbounds i8, ptr %dest1, i64 4
  %dest3 = getelementptr inbounds i8, ptr %dest2, i64 4
  %reduce0 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec0)
  %reduce1 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec1)
  %reduce2 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec2)
  %reduce3 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec3)
  store float %reduce0, ptr %dest0, align 4
  store float %reduce1, ptr %dest1, align 4
  store float %reduce2, ptr %dest2, align 4
  store float %reduce3, ptr %dest3, align 4
  ret void
}
```

https://github.com/llvm/llvm-project/pull/111519


More information about the llvm-commits mailing list