[llvm] Add `llvm.vector.partial.reduce.fadd` intrinsic (PR #159776)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 2 09:33:37 PST 2025
================
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -global-isel -global-isel-abort=2 -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
+; RUN: llc -global-isel -global-isel-abort=2 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
+
+target triple = "aarch64-linux-gnu"
+
+define <vscale x 4 x float> @fdot_wide_nxv4f32(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; SVE2-LABEL: fdot_wide_nxv4f32:
+; SVE2: // %bb.0: // %entry
+; SVE2-NEXT: uunpklo z3.s, z1.h
+; SVE2-NEXT: uunpklo z4.s, z2.h
+; SVE2-NEXT: ptrue p0.s
+; SVE2-NEXT: uunpkhi z1.s, z1.h
+; SVE2-NEXT: uunpkhi z2.s, z2.h
+; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
+; SVE2-NEXT: fcvt z4.s, p0/m, z4.h
+; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
+; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
+; SVE2-NEXT: fmul z3.s, z3.s, z4.s
+; SVE2-NEXT: fmul z1.s, z1.s, z2.s
+; SVE2-NEXT: fadd z0.s, z0.s, z3.s
+; SVE2-NEXT: fadd z0.s, z0.s, z1.s
+; SVE2-NEXT: ret
+;
+; SVE2P1-LABEL: fdot_wide_nxv4f32:
+; SVE2P1: // %bb.0: // %entry
+; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
+; SVE2P1-NEXT: ret
+entry:
+ %a.wide = fpext <vscale x 8 x half> %a to <vscale x 8 x float>
+ %b.wide = fpext <vscale x 8 x half> %b to <vscale x 8 x float>
+ %mult = fmul <vscale x 8 x float> %a.wide, %b.wide
+ %partial.reduce = call <vscale x 4 x float> @llvm.vector.partial.reduce.fadd(<vscale x 4 x float> %acc, <vscale x 8 x float> %mult)
+ ret <vscale x 4 x float> %partial.reduce
----------------
paulwalker-arm wrote:
I think it's the add part of the equation that matters, which in this case is the partial reduction intrinsic (albeit I preferred the option to make the flag explicit within the IR). See https://godbolt.org/z/bx4bj1qTM
https://github.com/llvm/llvm-project/pull/159776
More information about the llvm-commits
mailing list