[llvm] 0e346ee - [DAG] fold avgu(zext(x), zext(y)) -> zext(avgu(x, y)) (#95134)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 12 04:58:53 PDT 2024


Author: c8ef
Date: 2024-06-12T12:58:49+01:00
New Revision: 0e346eeac676d909402abe01fb23248bb3efc5e0

URL: https://github.com/llvm/llvm-project/commit/0e346eeac676d909402abe01fb23248bb3efc5e0
DIFF: https://github.com/llvm/llvm-project/commit/0e346eeac676d909402abe01fb23248bb3efc5e0.diff

LOG: [DAG] fold avgu(zext(x), zext(y)) -> zext(avgu(x, y)) (#95134)

close: #86301

Added: 
    llvm/test/CodeGen/AArch64/avg.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4fcbe08e4b2b9..36fd8c136a3c5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5236,6 +5236,23 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
     return DAG.getNode(ISD::SRL, DL, VT, X,
                        DAG.getShiftAmountConstant(1, VT, DL));
 
+  // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y))
+  SDValue A;
+  SDValue B;
+  if (sd_match(
+          N, m_BinOp(ISD::AVGFLOORU, m_ZExt(m_Value(A)), m_ZExt(m_Value(B)))) &&
+      A.getValueType() == B.getValueType() &&
+      hasOperation(ISD::AVGFLOORU, A.getValueType())) {
+    SDValue AvgFloorU = DAG.getNode(ISD::AVGFLOORU, DL, A.getValueType(), A, B);
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgFloorU);
+  }
+  if (sd_match(
+          N, m_BinOp(ISD::AVGCEILU, m_ZExt(m_Value(A)), m_ZExt(m_Value(B)))) &&
+      A.getValueType() == B.getValueType() &&
+      hasOperation(ISD::AVGCEILU, A.getValueType())) {
+    SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, A.getValueType(), A, B);
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgCeilU);
+  }
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index f36b8440fe4bf..b2cf089d8145f 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -9,9 +9,8 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
 define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
 ; CHECK-LABEL: haddu_zext:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    uhadd v0.8b, v0.8b, v1.8b
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x0 = zext <8 x i8> %a0 to <8 x i16>
   %x1 = zext <8 x i8> %a1 to <8 x i16>
@@ -23,9 +22,8 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
 define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
 ; CHECK-LABEL: rhaddu_zext:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    urhadd v0.8b, v0.8b, v1.8b
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    urhadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
   %x0 = zext <8 x i8> %a0 to <8 x i16>
   %x1 = zext <8 x i8> %a1 to <8 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/avg.ll b/llvm/test/CodeGen/AArch64/avg.ll
new file mode 100644
index 0000000000000..70cc360f4ae57
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/avg.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define <16 x i16> @zext_avgflooru(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: zext_avgflooru:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    uhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    uhadd v1.8b, v2.8b, v3.8b
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ret
+  %x0 = zext <16 x i8> %a0 to <16 x i16>
+  %x1 = zext <16 x i8> %a1 to <16 x i16>
+  %and = and <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = add <16 x i16> %and, %shift
+  ret <16 x i16> %avg
+}
+
+define <16 x i16> @zext_avgflooru_negative(<16 x i8> %a0, <16 x i4> %a1) {
+; CHECK-LABEL: zext_avgflooru_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.16b, #15
+; CHECK-NEXT:    ext v3.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    and v1.16b, v1.16b, v2.16b
+; CHECK-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    uhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    uhadd v1.8b, v3.8b, v2.8b
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ret
+  %x0 = zext <16 x i8> %a0 to <16 x i16>
+  %x1 = zext <16 x i4> %a1 to <16 x i16>
+  %and = and <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = add <16 x i16> %and, %shift
+  ret <16 x i16> %avg
+}
+
+define <16 x i16> @zext_avgceilu(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: zext_avgceilu:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    urhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    urhadd v1.8b, v2.8b, v3.8b
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ret
+  %x0 = zext <16 x i8> %a0 to <16 x i16>
+  %x1 = zext <16 x i8> %a1 to <16 x i16>
+  %or = or <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = sub <16 x i16> %or, %shift
+  ret <16 x i16> %avg
+}
+
+define <16 x i16> @zext_avgceilu_negative(<16 x i4> %a0, <16 x i8> %a1) {
+; CHECK-LABEL: zext_avgceilu_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.16b, #15
+; CHECK-NEXT:    ext v3.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    ext v2.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    urhadd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    urhadd v1.8b, v2.8b, v3.8b
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ret
+  %x0 = zext <16 x i4> %a0 to <16 x i16>
+  %x1 = zext <16 x i8> %a1 to <16 x i16>
+  %or = or <16 x i16> %x0, %x1
+  %xor = xor <16 x i16> %x0, %x1
+  %shift = lshr <16 x i16> %xor, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %avg = sub <16 x i16> %or, %shift
+  ret <16 x i16> %avg
+}


        


More information about the llvm-commits mailing list