[llvm] [DAG] computeKnownBits - abds(x, y) will be zero in the upper bits if x and y are sign-extended (PR #94448)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 5 02:52:00 PDT 2024


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/94448

>From e69db9de4744cba33f9a58427c0c83b4ee3a4fe8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 5 Jun 2024 10:16:19 +0100
Subject: [PATCH 1/2] [AArch64] neon-abd.ll - add ABDS test coverage for #94442

---
 llvm/test/CodeGen/AArch64/neon-abd.ll | 42 +++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 901cb8adc23f0..4862c8dde7d10 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -554,6 +554,48 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
   ret <16 x i8> %sub
 }
 
+; TODO: (abds x, y) upper bits are known zero if x and y have extra sign bits
+define <4 x i16> @combine_sabd_4h_zerosign(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: combine_sabd_4h_zerosign:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI41_0
+; CHECK-NEXT:    adrp x9, .LCPI41_1
+; CHECK-NEXT:    ldr d2, [x8, :lo12:.LCPI41_0]
+; CHECK-NEXT:    ldr d3, [x9, :lo12:.LCPI41_1]
+; CHECK-NEXT:    sshl v0.4h, v0.4h, v2.4h
+; CHECK-NEXT:    sshl v1.4h, v1.4h, v3.4h
+; CHECK-NEXT:    movi v2.4h, #128, lsl #8
+; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    ret
+  %a.ext = ashr <4 x i16> %a, <i16 7, i16 8, i16 9, i16 10>
+  %b.ext = ashr <4 x i16> %b, <i16 11, i16 12, i16 13, i16 14>
+  %max = tail call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext)
+  %min = tail call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a.ext, <4 x i16> %b.ext)
+  %sub = sub <4 x i16> %max, %min
+  %mask = and <4 x i16> %sub, <i16 32768, i16 32768, i16 32768, i16 32768>
+  ret <4 x i16> %mask
+}
+
+; negative test - mask extends beyond known zero bits
+define <2 x i32> @combine_sabd_2s_zerosign_negative(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: combine_sabd_2s_zerosign_negative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #3
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #15
+; CHECK-NEXT:    mvni v2.2s, #7, msl #16
+; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    ret
+  %a.ext = ashr <2 x i32> %a, <i32 3, i32 3>
+  %b.ext = ashr <2 x i32> %b, <i32 15, i32 15>
+  %max = tail call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext)
+  %min = tail call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a.ext, <2 x i32> %b.ext)
+  %sub = sub <2 x i32> %max, %min
+  %mask = and <2 x i32> %sub, <i32 -524288, i32 -524288> ; 0xFFF80000
+  ret <2 x i32> %mask
+}
+
 declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
 declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
 

>From f6add386ad56dac324dcb79a43e8336e47278a9f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 5 Jun 2024 10:19:42 +0100
Subject: [PATCH 2/2] [DAG] computeKnownBits - abds(x, y) will be zero in the
 upper bits if x and y are sign-extended

As reported on #94442 - if x and y have more than one signbit, then the upper bits of its absolute value are guaranteed to be zero

Alive2: https://alive2.llvm.org/ce/z/7_z2Vc
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  7 +++++++
 llvm/test/CodeGen/AArch64/neon-abd.ll          | 12 ++----------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 414c724b94f7b..7d87b4fb6b17a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3477,6 +3477,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
     Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
     Known = KnownBits::abds(Known, Known2);
+    unsigned SignBits1 =
+        ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
+    if (SignBits1 == 1)
+      break;
+    unsigned SignBits0 =
+        ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known.Zero.setHighBits(std::min(SignBits0, SignBits1) - 1);
     break;
   }
   case ISD::UMUL_LOHI: {
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index 4862c8dde7d10..f743bae84053d 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -554,19 +554,11 @@ define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
   ret <16 x i8> %sub
 }
 
-; TODO: (abds x, y) upper bits are known zero if x and y have extra sign bits
+; (abds x, y) upper bits are known zero if x and y have extra sign bits
 define <4 x i16> @combine_sabd_4h_zerosign(<4 x i16> %a, <4 x i16> %b) #0 {
 ; CHECK-LABEL: combine_sabd_4h_zerosign:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI41_0
-; CHECK-NEXT:    adrp x9, .LCPI41_1
-; CHECK-NEXT:    ldr d2, [x8, :lo12:.LCPI41_0]
-; CHECK-NEXT:    ldr d3, [x9, :lo12:.LCPI41_1]
-; CHECK-NEXT:    sshl v0.4h, v0.4h, v2.4h
-; CHECK-NEXT:    sshl v1.4h, v1.4h, v3.4h
-; CHECK-NEXT:    movi v2.4h, #128, lsl #8
-; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
-; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    ret
   %a.ext = ashr <4 x i16> %a, <i16 7, i16 8, i16 9, i16 10>
   %b.ext = ashr <4 x i16> %b, <i16 11, i16 12, i16 13, i16 14>



More information about the llvm-commits mailing list