[llvm] [DAG] ComputeNumSignBits - add AVGCEILS/AVGFLOORS handling (PR #93021)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed May 22 04:42:03 PDT 2024


https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/93021

Pulled from #92096

>From 935538a4901764f6032c42f185252ee1b696db45 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 22 May 2024 12:37:59 +0100
Subject: [PATCH 1/2] [AArch64] Add tests showing failure to calculate
 AVGCEILS/AVGFLOORS sign bits

---
 llvm/test/CodeGen/AArch64/hadd-combine.ll | 71 ++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index c0f76784eb375..c8972c8d814f7 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -955,6 +955,75 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
   ret <8 x i16> %r0
 }
 
+; TODO - unnecessary sign_extend_inreg after shadd
+define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: shadd_signbits_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
+; CHECK-NEXT:    shadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT:    shl v0.2s, v1.2s, #17
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    ret
+  %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+  %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+  %m = and <2 x i32> %x0, %x1
+  %s = xor <2 x i32> %x0, %x1
+  %x = ashr <2 x i32> %s, <i32 1, i32 1>
+  %avg = add <2 x i32> %m, %x
+  %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+  %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+  store <2 x i32> %avg, ptr %p2 ; extra use
+  ret <2 x i32> %avg2
+}
+
+; TODO - unnecessary sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
+; CHECK-NEXT:    srhadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT:    shl v0.2s, v1.2s, #17
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    ret
+  %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+  %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+  %m = or <2 x i32> %x0, %x1
+  %s = xor <2 x i32> %x0, %x1
+  %x = ashr <2 x i32> %s, <i32 1, i32 1>
+  %avg = sub <2 x i32> %m, %x
+  %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+  %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+  store <2 x i32> %avg, ptr %p2 ; extra use
+  ret <2 x i32> %avg2
+}
+
+; negative test - not enough signbits to remove sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
+; CHECK-NEXT:    srhadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT:    shl v0.2s, v1.2s, #22
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #22
+; CHECK-NEXT:    ret
+  %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+  %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+  %m = or <2 x i32> %x0, %x1
+  %s = xor <2 x i32> %x0, %x1
+  %x = ashr <2 x i32> %s, <i32 1, i32 1>
+  %avg = sub <2 x i32> %m, %x
+  %avg1 = shl <2 x i32> %avg, <i32 22, i32 22>
+  %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22>
+  store <2 x i32> %avg, ptr %p2 ; extra use
+  ret <2 x i32> %avg2
+}
+
 declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
 declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
 declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
@@ -979,4 +1048,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
 declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
 declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
 declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)

>From 9c403aa8acf7be3323d5baf827eb571fc12835a3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 22 May 2024 12:40:19 +0100
Subject: [PATCH 2/2] [DAG] ComputeNumSignBits - add AVGCEILS/AVGFLOORS
 handling

Pulled from #92096
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  7 +++++++
 llvm/test/CodeGen/AArch64/hadd-combine.ll      | 16 ++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 777bbf071732e..b05649c6ce955 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4780,6 +4780,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
         (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
     return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
   }
+  case ISD::AVGCEILS:
+  case ISD::AVGFLOORS:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    if (Tmp == 1)
+      return 1; // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    return std::min(Tmp, Tmp2);
   case ISD::SREM:
     // The sign bit is the LHS's sign bit, except when the result of the
     // remainder is zero. The magnitude of the result should be less than or
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index c8972c8d814f7..28f454767c121 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -955,16 +955,14 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
   ret <8 x i16> %r0
 }
 
-; TODO - unnecessary sign_extend_inreg after shadd
+; Remove unnecessary sign_extend_inreg after shadd
 define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
 ; CHECK-LABEL: shadd_signbits_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
 ; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
-; CHECK-NEXT:    shadd v1.2s, v0.2s, v1.2s
-; CHECK-NEXT:    shl v0.2s, v1.2s, #17
-; CHECK-NEXT:    str d1, [x0]
-; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    shadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
   %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
   %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
@@ -978,16 +976,14 @@ define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
   ret <2 x i32> %avg2
 }
 
-; TODO - unnecessary sign_extend_inreg after srhadd
+; Remove unnecessary sign_extend_inreg after srhadd
 define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
 ; CHECK-LABEL: srhadd_signbits_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
 ; CHECK-NEXT:    sshr v1.2s, v1.2s, #17
-; CHECK-NEXT:    srhadd v1.2s, v0.2s, v1.2s
-; CHECK-NEXT:    shl v0.2s, v1.2s, #17
-; CHECK-NEXT:    str d1, [x0]
-; CHECK-NEXT:    sshr v0.2s, v0.2s, #17
+; CHECK-NEXT:    srhadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
   %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
   %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>



More information about the llvm-commits mailing list