[llvm] a2bd372 - [AArch64] Recurse further into performUADDVCombine

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 7 11:14:13 PST 2023


Author: David Green
Date: 2023-02-07T19:14:06Z
New Revision: a2bd37230706ecb164c7ebe5cac66f0fd80b778d

URL: https://github.com/llvm/llvm-project/commit/a2bd37230706ecb164c7ebe5cac66f0fd80b778d
DIFF: https://github.com/llvm/llvm-project/commit/a2bd37230706ecb164c7ebe5cac66f0fd80b778d.diff

LOG: [AArch64] Recurse further into performUADDVCombine

This adjusts the performUADDVCombine to recurse further into adds from
reductions, which can help produce more UADDLP when the order of lanes is
unimportant. It also prevents some regressions from subsequent patches.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/vecreduce-add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 74f446767027c..edea7f01b510e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15281,7 +15281,7 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
 // matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
 // into UADDV(UADDLP(x)). This can also happen through an extra add, where we
 // transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
-static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
   auto DetectAddExtract = [&](SDValue A) {
     // Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
     // UADDLP(x) if found.
@@ -15315,22 +15315,27 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
     return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
   };
 
-  SDValue A = N->getOperand(0);
   if (SDValue R = DetectAddExtract(A))
-    return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
-  if (A.getOpcode() == ISD::ADD) {
-    if (SDValue R = DetectAddExtract(A.getOperand(0)))
-      return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
-                         DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
-                                     A.getOperand(1)));
-    if (SDValue R = DetectAddExtract(A.getOperand(1)))
-      return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
-                         DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
-                                     A.getOperand(0)));
-  }
+    return R;
+
+  if (A.getOperand(0).getOpcode() == ISD::ADD && A.getOperand(0).hasOneUse())
+    if (SDValue R = performUADDVAddCombine(A.getOperand(0), DAG))
+      return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+                         A.getOperand(1));
+  if (A.getOperand(1).getOpcode() == ISD::ADD && A.getOperand(1).hasOneUse())
+    if (SDValue R = performUADDVAddCombine(A.getOperand(1), DAG))
+      return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+                         A.getOperand(0));
   return SDValue();
 }
 
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue A = N->getOperand(0);
+  if (A.getOpcode() == ISD::ADD)
+    if (SDValue R = performUADDVAddCombine(A, DAG))
+      return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
+  return SDValue();
+}
 
 static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 8316c881dd484..7b4da9ecf3cd4 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -2039,17 +2039,11 @@ entry:
 define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i16> %ay, <8 x i16> %bx, <8 x i16> %by) {
 ; CHECK-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v5.4s, v0.4h, #0
-; CHECK-NEXT:    ushll v4.4s, v2.4h, #0
-; CHECK-NEXT:    ushll v6.4s, v1.4h, #0
-; CHECK-NEXT:    uaddw2 v0.4s, v5.4s, v0.8h
-; CHECK-NEXT:    ushll v5.4s, v3.4h, #0
-; CHECK-NEXT:    uaddw2 v1.4s, v6.4s, v1.8h
-; CHECK-NEXT:    uaddw2 v2.4s, v4.4s, v2.8h
-; CHECK-NEXT:    uaddw2 v3.4s, v5.4s, v3.8h
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    add v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uaddlp v3.4s, v3.8h
+; CHECK-NEXT:    uaddlp v1.4s, v1.8h
+; CHECK-NEXT:    uadalp v3.4s, v2.8h
+; CHECK-NEXT:    uadalp v1.4s, v0.8h
+; CHECK-NEXT:    add v0.4s, v3.4s, v1.4s
 ; CHECK-NEXT:    addv s0, v0.4s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list