[llvm] convert avgfloors -> avgfloorsu if eligiible (PR #85583)

Sun Mar 17 14:02:10 PDT 2024

https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/85583

None

>From 142558998a9f9d0a4323493a5b2c0dac0bc74330 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 17 Mar 2024 16:32:46 -0400
Subject: [PATCH] convert avgfloors -> avgfloorsu if eligiible

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp      | 14 ++++++++++++++
 .../CodeGen/AArch64/aarch64-known-bits-hadd.ll     |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5eb53d57c9c2bf..f4fdec7db4a57a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5060,6 +5060,20 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
 
+  // fold (avgfloors x, y) -> (avgfloorsu x, y) iff both args are known positive
+  // and their results do not overflow
+  if (Opcode == ISD::AVGFLOORS && hasOperation(ISD::AVGFLOORU, VT) &&
+      DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1) &&
+      DAG.willNotOverflowAdd(true, N0, N1))
+    return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
+
+  // fold (avgfloors x, y) -> (avgfloorsu x, y) iff both args are known positive
+  // and their results do not overflow
+  if (Opcode == ISD::AVGCEILS && hasOperation(ISD::AVGCEILU, VT) &&
+      DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1) &&
+      DAG.willNotOverflowAdd(true, N0, N1))
+    return DAG.getNode(ISD::AVGFLOORU, DL, VT, N0, N1);
+
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
       return FoldedVOp;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
index 017f382774892c..6fc87af5260b04 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll
@@ -41,7 +41,7 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    shadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    bic v0.8h, #254, lsl #8
 ; CHECK-NEXT:    ret
   %x0 = zext <8 x i8> %a0 to <8 x i16>
@@ -56,7 +56,7 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
-; CHECK-NEXT:    srhadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uhadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    bic v0.8h, #254, lsl #8
 ; CHECK-NEXT:    ret
   %x0 = zext <8 x i8> %a0 to <8 x i16>