[llvm] [DAG] ComputeNumSignBits - add AVGCEILS/AVGFLOORS handling (PR #93021)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 04:42:34 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Pulled from #<!-- -->92096
---
Full diff: https://github.com/llvm/llvm-project/pull/93021.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (+7)
- (modified) llvm/test/CodeGen/AArch64/hadd-combine.ll (+66-1)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 777bbf071732e..b05649c6ce955 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4780,6 +4780,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
(VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
}
+ case ISD::AVGCEILS:
+ case ISD::AVGFLOORS:
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ if (Tmp == 1)
+ return 1; // Early out.
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
case ISD::SREM:
// The sign bit is the LHS's sign bit, except when the result of the
// remainder is zero. The magnitude of the result should be less than or
diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll
index c0f76784eb375..28f454767c121 100644
--- a/llvm/test/CodeGen/AArch64/hadd-combine.ll
+++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll
@@ -955,6 +955,71 @@ define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
ret <8 x i16> %r0
}
+; Remove unnecessary sign_extend_inreg after shadd
+define <2 x i32> @shadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: shadd_signbits_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: sshr v1.2s, v1.2s, #17
+; CHECK-NEXT: shadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+ %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+ %m = and <2 x i32> %x0, %x1
+ %s = xor <2 x i32> %x0, %x1
+ %x = ashr <2 x i32> %s, <i32 1, i32 1>
+ %avg = add <2 x i32> %m, %x
+ %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+ %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+ store <2 x i32> %avg, ptr %p2 ; extra use
+ ret <2 x i32> %avg2
+}
+
+; Remove unnecessary sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: sshr v1.2s, v1.2s, #17
+; CHECK-NEXT: srhadd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+ %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+ %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+ %m = or <2 x i32> %x0, %x1
+ %s = xor <2 x i32> %x0, %x1
+ %x = ashr <2 x i32> %s, <i32 1, i32 1>
+ %avg = sub <2 x i32> %m, %x
+ %avg1 = shl <2 x i32> %avg, <i32 17, i32 17>
+ %avg2 = ashr <2 x i32> %avg1, <i32 17, i32 17>
+ store <2 x i32> %avg, ptr %p2 ; extra use
+ ret <2 x i32> %avg2
+}
+
+; negative test - not enough signbits to remove sign_extend_inreg after srhadd
+define <2 x i32> @srhadd_signbits_v2i32_negative(<2 x i32> %a0, <2 x i32> %a1, ptr %p2) {
+; CHECK-LABEL: srhadd_signbits_v2i32_negative:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sshr v0.2s, v0.2s, #17
+; CHECK-NEXT: sshr v1.2s, v1.2s, #17
+; CHECK-NEXT: srhadd v1.2s, v0.2s, v1.2s
+; CHECK-NEXT: shl v0.2s, v1.2s, #22
+; CHECK-NEXT: str d1, [x0]
+; CHECK-NEXT: sshr v0.2s, v0.2s, #22
+; CHECK-NEXT: ret
+ %x0 = ashr <2 x i32> %a0, <i32 17, i32 17>
+ %x1 = ashr <2 x i32> %a1, <i32 17, i32 17>
+ %m = or <2 x i32> %x0, %x1
+ %s = xor <2 x i32> %x0, %x1
+ %x = ashr <2 x i32> %s, <i32 1, i32 1>
+ %avg = sub <2 x i32> %m, %x
+ %avg1 = shl <2 x i32> %avg, <i32 22, i32 22>
+ %avg2 = ashr <2 x i32> %avg1, <i32 22, i32 22>
+ store <2 x i32> %avg, ptr %p2 ; extra use
+ ret <2 x i32> %avg2
+}
+
declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>)
declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>)
declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>)
@@ -979,4 +1044,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>)
declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>)
declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>)
declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
\ No newline at end of file
+declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>)
``````````
</details>
https://github.com/llvm/llvm-project/pull/93021
More information about the llvm-commits
mailing list