[llvm] [DAG] ComputeNumSignBits - add AVGCEILS/AVGFLOORS handling (PR #93021)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 04:42:03 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/93021
Pulled from #92096
>From 935538a4901764f6032c42f185252ee1b696db45 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 22 May 2024 12:37:59 +0100
Subject: [PATCH 1/2] [AArch64] Add tests showing failure to calculate
AVGCEILS/AVGFLOORS sign bits
---
llvm/test/CodeGen/AArch64/hadd-combine.ll | 71 ++++++++++++++++++++++-
1 file changed, 70 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index c0f76784eb375..c8972c8d814f7 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -955,6 +955,75 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %r0
}
+; TODO - unnecessary sign_extend_inreg after shadd
+define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: shadd_signbits_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: sshr v1.2s, v1.2s, #17
+; CHECK-NEXT: shadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT: shl v0.2s, v1.2s, #17
+; CHECK-NEXT: str d1, [x0]
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: ret
+ %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+ %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+ %m = and <2 x i32> %x0, %x1
+ %s = xor <2 x i32> %x0, %x1
+ %x = ashr <2 x i32> %s, <i32 1, i32 1>
+ %avg = add <2 x i32> %m, %x
+ %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+ %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+ store <2 x i32> %avg, ptr %p2 ; extra use
+ ret <2 x i32> %avg2
+}
+
+; TODO - unnecessary sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: sshr v1.2s, v1.2s, #17
+; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT: shl v0.2s, v1.2s, #17
+; CHECK-NEXT: str d1, [x0]
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: ret
+ %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+ %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+ %m = or <2 x i32> %x0, %x1
+ %s = xor <2 x i32> %x0, %x1
+ %x = ashr <2 x i32> %s, <i32 1, i32 1>
+ %avg = sub <2 x i32> %m, %x
+ %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+ %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+ store <2 x i32> %avg, ptr %p2 ; extra use
+ ret <2 x i32> %avg2
+}
+
+; negative test - not enough signbits to remove sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32_negative:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: sshr v1.2s, v1.2s, #17
+; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT: shl v0.2s, v1.2s, #22
+; CHECK-NEXT: str d1, [x0]
+; CHECK-NEXT: sshr v0.2s, v0.2s, #22
+; CHECK-NEXT: ret
+ %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+ %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+ %m = or <2 x i32> %x0, %x1
+ %s = xor <2 x i32> %x0, %x1
+ %x = ashr <2 x i32> %s, <i32 1, i32 1>
+ %avg = sub <2 x i32> %m, %x
+ %avg1 = shl <2 x i32> %avg, <i32 22, i32 22>
+ %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22>
+ store <2 x i32> %avg, ptr %p2 ; extra use
+ ret <2 x i32> %avg2
+}
+
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
@@ -979,4 +1048,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
>From 9c403aa8acf7be3323d5baf827eb571fc12835a3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 22 May 2024 12:40:19 +0100
Subject: [PATCH 2/2] [DAG] ComputeNumSignBits - add AVGCEILS/AVGFLOORS
handling
Pulled from #92096
---
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 +++++++
llvm/test/CodeGen/AArch64/hadd-combine.ll | 16 ++++++----------
2 files changed, 13 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 777bbf071732e..b05649c6ce955 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4780,6 +4780,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
(VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
}
+ case ISD::AVGCEILS:
+ case ISD::AVGFLOORS:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
case ISD::SREM:
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero. The magnitude of the result should be less than or
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index c8972c8d814f7..28f454767c121 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -955,16 +955,14 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %r0
}
-; TODO - unnecessary sign_extend_inreg after shadd
+; Remove unnecessary sign_extend_inreg after shadd
define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
; CHECK-LABEL: shadd_signbits_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.2s, v0.2s, #17
; CHECK-NEXT: sshr v1.2s, v1.2s, #17
-; CHECK-NEXT: shadd v1.2s, v0.2s, v1.2s
-; CHECK-NEXT: shl v0.2s, v1.2s, #17
-; CHECK-NEXT: str d1, [x0]
-; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: shadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
%x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
@@ -978,16 +976,14 @@ define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
ret <2 x i32> %avg2
}
-; TODO - unnecessary sign_extend_inreg after srhadd
+; Remove unnecessary sign_extend_inreg after srhadd
define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
; CHECK-LABEL: srhadd_signbits_v2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sshr v0.2s, v0.2s, #17
; CHECK-NEXT: sshr v1.2s, v1.2s, #17
-; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s
-; CHECK-NEXT: shl v0.2s, v1.2s, #17
-; CHECK-NEXT: str d1, [x0]
-; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: srhadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: str d0, [x0]
; CHECK-NEXT: ret
%x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
%x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
More information about the llvm-commits
mailing list