[llvm] [AArch64] Disable consecutive store merging when Neon is unavailable (PR #111519)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 8 06:03:33 PDT 2024
paulwalker-arm wrote:
```
define void @consecutive_stores_quadruple(ptr noalias %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3) "aarch64_pstate_sm_enabled" {
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
%dest2 = getelementptr inbounds i8, ptr %dest1, i64 4
%dest3 = getelementptr inbounds i8, ptr %dest2, i64 4
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
%reduce1 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec1)
%reduce2 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec2)
%reduce3 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec3)
store float %reduce0, ptr %dest0, align 4
store float %reduce1, ptr %dest1, align 4
store float %reduce2, ptr %dest2, align 4
store float %reduce3, ptr %dest3, align 4
ret void
}
define void @consecutive_stores_quadruple2(ptr noalias %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3) "aarch64_pstate_sm_enabled" {
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
%dest2 = getelementptr inbounds i8, ptr %dest1, i64 4
%dest3 = getelementptr inbounds i8, ptr %dest2, i64 4
%reduce0 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec0)
%reduce1 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec1)
%reduce2 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec2)
%reduce3 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float zeroinitializer, <vscale x 4 x float> %vec3)
store float %reduce0, ptr %dest0, align 4
store float %reduce1, ptr %dest1, align 4
store float %reduce2, ptr %dest2, align 4
store float %reduce3, ptr %dest3, align 4
ret void
}
```
https://github.com/llvm/llvm-project/pull/111519
More information about the llvm-commits
mailing list