[llvm] a2bd372 - [AArch64] Recurse further into performUADDVCombine
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 7 11:14:13 PST 2023
Author: David Green
Date: 2023-02-07T19:14:06Z
New Revision: a2bd37230706ecb164c7ebe5cac66f0fd80b778d
URL: https://github.com/llvm/llvm-project/commit/a2bd37230706ecb164c7ebe5cac66f0fd80b778d
DIFF: https://github.com/llvm/llvm-project/commit/a2bd37230706ecb164c7ebe5cac66f0fd80b778d.diff
LOG: [AArch64] Recurse further into performUADDVCombine
This adjusts the performUADDVCombine to recurse further into adds from
reductions, which can help produce more UADDLP when the order of lanes is
unimportant. It also prevents some regressions from subsequent patches.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/vecreduce-add.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 74f446767027c..edea7f01b510e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15281,7 +15281,7 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
// matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
// into UADDV(UADDLP(x)). This can also happen through an extra add, where we
// transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
-static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
auto DetectAddExtract = [&](SDValue A) {
// Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
// UADDLP(x) if found.
@@ -15315,22 +15315,27 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
};
- SDValue A = N->getOperand(0);
if (SDValue R = DetectAddExtract(A))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
- if (A.getOpcode() == ISD::ADD) {
- if (SDValue R = DetectAddExtract(A.getOperand(0)))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
- A.getOperand(1)));
- if (SDValue R = DetectAddExtract(A.getOperand(1)))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
- A.getOperand(0)));
- }
+ return R;
+
+ if (A.getOperand(0).getOpcode() == ISD::ADD && A.getOperand(0).hasOneUse())
+ if (SDValue R = performUADDVAddCombine(A.getOperand(0), DAG))
+ return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+ A.getOperand(1));
+ if (A.getOperand(1).getOpcode() == ISD::ADD && A.getOperand(1).hasOneUse())
+ if (SDValue R = performUADDVAddCombine(A.getOperand(1), DAG))
+ return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+ A.getOperand(0));
return SDValue();
}
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue A = N->getOperand(0);
+ if (A.getOpcode() == ISD::ADD)
+ if (SDValue R = performUADDVAddCombine(A, DAG))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
+ return SDValue();
+}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 8316c881dd484..7b4da9ecf3cd4 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -2039,17 +2039,11 @@ entry:
define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i16> %ay, <8 x i16> %bx, <8 x i16> %by) {
; CHECK-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ushll v5.4s, v0.4h, #0
-; CHECK-NEXT: ushll v4.4s, v2.4h, #0
-; CHECK-NEXT: ushll v6.4s, v1.4h, #0
-; CHECK-NEXT: uaddw2 v0.4s, v5.4s, v0.8h
-; CHECK-NEXT: ushll v5.4s, v3.4h, #0
-; CHECK-NEXT: uaddw2 v1.4s, v6.4s, v1.8h
-; CHECK-NEXT: uaddw2 v2.4s, v4.4s, v2.8h
-; CHECK-NEXT: uaddw2 v3.4s, v5.4s, v3.8h
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: add v1.4s, v2.4s, v3.4s
-; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: uaddlp v3.4s, v3.8h
+; CHECK-NEXT: uaddlp v1.4s, v1.8h
+; CHECK-NEXT: uadalp v3.4s, v2.8h
+; CHECK-NEXT: uadalp v1.4s, v0.8h
+; CHECK-NEXT: add v0.4s, v3.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list