[llvm] 3ab95e4 - [AArch64] Scalarize v2f16 vecreduce.fadd (#147783)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 10:45:27 PDT 2025
Author: David Green
Date: 2025-07-11T18:45:24+01:00
New Revision: 3ab95e434428661f16c3c4bb416e7f97f926fa50
URL: https://github.com/llvm/llvm-project/commit/3ab95e434428661f16c3c4bb416e7f97f926fa50
DIFF: https://github.com/llvm/llvm-project/commit/3ab95e434428661f16c3c4bb416e7f97f926fa50.diff
LOG: [AArch64] Scalarize v2f16 vecreduce.fadd (#147783)
This adds a custom lowering for v2f16 vecreduce.fadd to scalarize as
opposed to padding with zeroes. This allows it to generate the more
efficient faddp. Helps with #147583.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5f6832fd2e575..f7de61f044a7d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1352,6 +1352,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
}
+ if (Subtarget->hasFullFP16())
+ setOperationAction(ISD::VECREDUCE_FADD, MVT::v2f16, Custom);
+
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
@@ -16046,9 +16049,19 @@ static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // Scalarize v2f16 to turn it into a faddp. This will be more efficient than
+ // widening by inserting zeroes.
+ if (Subtarget->hasFullFP16() && Op.getOpcode() == ISD::VECREDUCE_FADD &&
+ SrcVT == MVT::v2f16) {
+ SDLoc DL(Op);
+ return DAG.getNode(ISD::FADD, DL, MVT::f16,
+ DAG.getExtractVectorElt(DL, MVT::f16, Src, 0),
+ DAG.getExtractVectorElt(DL, MVT::f16, Src, 1));
+ }
// Try to lower fixed length reductions to SVE.
- EVT SrcVT = Src.getValueType();
bool OverrideNEON = !Subtarget->isNeonAvailable() ||
Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 2e993a85760c6..8a84d3ca2328c 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -27,9 +27,6 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) {
; CHECK-SD-FP16-LABEL: add_v2HalfH:
; CHECK-SD-FP16: // %bb.0:
; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-FP16-NEXT: mov v0.h[2], wzr
-; CHECK-SD-FP16-NEXT: mov v0.h[3], wzr
-; CHECK-SD-FP16-NEXT: faddp v0.4h, v0.4h, v0.4h
; CHECK-SD-FP16-NEXT: faddp h0, v0.2h
; CHECK-SD-FP16-NEXT: ret
;
More information about the llvm-commits
mailing list