[llvm] [KnownBits] Make abdu and abds optimal (PR #89081)

Thu Apr 18 02:06:27 PDT 2024

https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/89081

>From 8704b02f3ffedd18d29a3f078c3de0b857d7a9d4 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 17 Apr 2024 15:35:17 +0100
Subject: [PATCH 1/3] [KnownBits] Make abdu and abds optimal

---
 llvm/lib/Support/KnownBits.cpp           | 51 ++++++++++-----------
 llvm/unittests/Support/KnownBitsTest.cpp | 56 +-----------------------
 2 files changed, 24 insertions(+), 83 deletions(-)

diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index d72355dab6f1d3..321842082c98e3 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -232,41 +232,34 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) {
 }
 
 KnownBits KnownBits::abdu(const KnownBits &LHS, const KnownBits &RHS) {
-  // abdu(LHS,RHS) = sub(umax(LHS,RHS), umin(LHS,RHS)).
-  KnownBits UMaxValue = umax(LHS, RHS);
-  KnownBits UMinValue = umin(LHS, RHS);
-  KnownBits MinMaxDiff = computeForAddSub(/*Add=*/false, /*NSW=*/false,
-                                          /*NUW=*/true, UMaxValue, UMinValue);
+  // If we know which argument is larger, return (sub LHS, RHS) or
+  // (sub RHS, LHS) directly.
+  if (LHS.getMinValue().uge(RHS.getMaxValue()))
+    return computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, LHS,
+                            RHS);
+  if (RHS.getMinValue().uge(LHS.getMaxValue()))
+    return computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, RHS,
+                            LHS);
 
-  // find the common bits between sub(LHS,RHS) and sub(RHS,LHS).
+  // Find the common bits between (sub nuw LHS, RHS) and (sub nuw RHS, LHS).
   KnownBits Diff0 =
-      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, LHS, RHS);
+      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/true, LHS, RHS);
   KnownBits Diff1 =
-      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, RHS, LHS);
-  KnownBits SubDiff = Diff0.intersectWith(Diff1);
-
-  KnownBits KnownAbsDiff = MinMaxDiff.unionWith(SubDiff);
-  assert(!KnownAbsDiff.hasConflict() && "Bad Output");
-  return KnownAbsDiff;
+      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/true, RHS, LHS);
+  return Diff0.intersectWith(Diff1);
 }
 
 KnownBits KnownBits::abds(const KnownBits &LHS, const KnownBits &RHS) {
-  // abds(LHS,RHS) = sub(smax(LHS,RHS), smin(LHS,RHS)).
-  KnownBits SMaxValue = smax(LHS, RHS);
-  KnownBits SMinValue = smin(LHS, RHS);
-  KnownBits MinMaxDiff = computeForAddSub(/*Add=*/false, /*NSW=*/false,
-                                          /*NUW=*/false, SMaxValue, SMinValue);
-
-  // find the common bits between sub(LHS,RHS) and sub(RHS,LHS).
-  KnownBits Diff0 =
-      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, LHS, RHS);
-  KnownBits Diff1 =
-      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, RHS, LHS);
-  KnownBits SubDiff = Diff0.intersectWith(Diff1);
-
-  KnownBits KnownAbsDiff = MinMaxDiff.unionWith(SubDiff);
-  assert(!KnownAbsDiff.hasConflict() && "Bad Output");
-  return KnownAbsDiff;
+  // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF]
+  auto Flip = [](const KnownBits &Val) {
+    unsigned SignBitPosition = Val.getBitWidth() - 1;
+    APInt Zero = Val.Zero;
+    APInt One = Val.One;
+    Zero.setBitVal(SignBitPosition, Val.One[SignBitPosition]);
+    One.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]);
+    return KnownBits(Zero, One);
+  };
+  return abdu(Flip(LHS), Flip(RHS));
 }
 
 static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) {
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp
index 027d6379af26b0..9a744271bc93ac 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -294,58 +294,6 @@ TEST(KnownBitsTest, SignBitUnknown) {
   EXPECT_TRUE(Known.isSignUnknown());
 }
 
-TEST(KnownBitsTest, ABDUSpecialCase) {
-  // There are 2 implementations of abdu - both are currently needed to cover
-  // extra cases.
-  KnownBits LHS, RHS, Res;
-
-  // abdu(LHS,RHS) = sub(umax(LHS,RHS), umin(LHS,RHS)).
-  // Actual: false (Inputs = 1011, 101?, Computed = 000?, Exact = 000?)
-  LHS.One = APInt(4, 0b1011);
-  RHS.One = APInt(4, 0b1010);
-  LHS.Zero = APInt(4, 0b0100);
-  RHS.Zero = APInt(4, 0b0100);
-  Res = KnownBits::abdu(LHS, RHS);
-  EXPECT_EQ(0b0000ul, Res.One.getZExtValue());
-  EXPECT_EQ(0b1110ul, Res.Zero.getZExtValue());
-
-  // find the common bits between sub(LHS,RHS) and sub(RHS,LHS).
-  // Actual: false (Inputs = ???1, 1000, Computed = ???1, Exact = 0??1)
-  LHS.One = APInt(4, 0b0001);
-  RHS.One = APInt(4, 0b1000);
-  LHS.Zero = APInt(4, 0b0000);
-  RHS.Zero = APInt(4, 0b0111);
-  Res = KnownBits::abdu(LHS, RHS);
-  EXPECT_EQ(0b0001ul, Res.One.getZExtValue());
-  EXPECT_EQ(0b0000ul, Res.Zero.getZExtValue());
-}
-
-TEST(KnownBitsTest, ABDSSpecialCase) {
-  // There are 2 implementations of abds - both are currently needed to cover
-  // extra cases.
-  KnownBits LHS, RHS, Res;
-
-  // abds(LHS,RHS) = sub(smax(LHS,RHS), smin(LHS,RHS)).
-  // Actual: false (Inputs = 1011, 10??, Computed = ????, Exact = 00??)
-  LHS.One = APInt(4, 0b1011);
-  RHS.One = APInt(4, 0b1000);
-  LHS.Zero = APInt(4, 0b0100);
-  RHS.Zero = APInt(4, 0b0100);
-  Res = KnownBits::abds(LHS, RHS);
-  EXPECT_EQ(0, Res.One.getSExtValue());
-  EXPECT_EQ(-4, Res.Zero.getSExtValue());
-
-  // find the common bits between sub(LHS,RHS) and sub(RHS,LHS).
-  // Actual: false (Inputs = ???1, 1000, Computed = ???1, Exact = 0??1)
-  LHS.One = APInt(4, 0b0001);
-  RHS.One = APInt(4, 0b1000);
-  LHS.Zero = APInt(4, 0b0000);
-  RHS.Zero = APInt(4, 0b0111);
-  Res = KnownBits::abds(LHS, RHS);
-  EXPECT_EQ(1, Res.One.getSExtValue());
-  EXPECT_EQ(0, Res.Zero.getSExtValue());
-}
-
 TEST(KnownBitsTest, BinaryExhaustive) {
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
@@ -367,9 +315,9 @@ TEST(KnownBitsTest, BinaryExhaustive) {
   testBinaryOpExhaustive(KnownBits::smax, APIntOps::smax);
   testBinaryOpExhaustive(KnownBits::smin, APIntOps::smin);
   testBinaryOpExhaustive(KnownBits::abdu, APIntOps::abdu,
-                         checkCorrectnessOnlyBinary);
+                         checkOptimalityBinary);
   testBinaryOpExhaustive(KnownBits::abds, APIntOps::abds,
-                         checkCorrectnessOnlyBinary);
+                         checkOptimalityBinary);
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
         return KnownBits::udiv(Known1, Known2);

>From 7b8b09751842c15d002aa04db374f37484ebebe1 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 18 Apr 2024 10:03:39 +0100
Subject: [PATCH 2/3] Refactor and add comments

---
 llvm/include/llvm/Support/KnownBits.h |  2 +-
 llvm/lib/Support/KnownBits.cpp        | 41 ++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 73cb01e0644a8d..9b7f405b625642 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -394,7 +394,7 @@ struct KnownBits {
   static KnownBits abdu(const KnownBits &LHS, const KnownBits &RHS);
 
   /// Compute known bits for abds(LHS, RHS).
-  static KnownBits abds(const KnownBits &LHS, const KnownBits &RHS);
+  static KnownBits abds(KnownBits LHS, KnownBits RHS);
 
   /// Compute known bits for shl(LHS, RHS).
   /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 321842082c98e3..fe47884f3e55ac 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -241,6 +241,7 @@ KnownBits KnownBits::abdu(const KnownBits &LHS, const KnownBits &RHS) {
     return computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, RHS,
                             LHS);
 
+  // By construction, the subtraction in abdu never has unsigned overflow.
   // Find the common bits between (sub nuw LHS, RHS) and (sub nuw RHS, LHS).
   KnownBits Diff0 =
       computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/true, LHS, RHS);
@@ -249,17 +250,35 @@ KnownBits KnownBits::abdu(const KnownBits &LHS, const KnownBits &RHS) {
   return Diff0.intersectWith(Diff1);
 }
 
-KnownBits KnownBits::abds(const KnownBits &LHS, const KnownBits &RHS) {
-  // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF]
-  auto Flip = [](const KnownBits &Val) {
-    unsigned SignBitPosition = Val.getBitWidth() - 1;
-    APInt Zero = Val.Zero;
-    APInt One = Val.One;
-    Zero.setBitVal(SignBitPosition, Val.One[SignBitPosition]);
-    One.setBitVal(SignBitPosition, Val.Zero[SignBitPosition]);
-    return KnownBits(Zero, One);
-  };
-  return abdu(Flip(LHS), Flip(RHS));
+KnownBits KnownBits::abds(KnownBits LHS, KnownBits RHS) {
+  // If we know which argument is larger, return (sub LHS, RHS) or
+  // (sub RHS, LHS) directly.
+  if (LHS.getSignedMinValue().sge(RHS.getSignedMaxValue()))
+    return computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, LHS,
+                            RHS);
+  if (RHS.getSignedMinValue().sge(LHS.getSignedMaxValue()))
+    return computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/false, RHS,
+                            LHS);
+
+  // Shift both arguments from the signed range to the unsigned range, e.g. from
+  // [-0x80, 0x7F] to [0, 0xFF]. This allows us to use "sub nuw" below just like
+  // abdu does.
+  // Note that we can't just use "sub nsw" instead because abds has signed
+  // inputs but an unsigned result, which makes the overflow conditions
+  // different.
+  unsigned SignBitPosition = LHS.getBitWidth() - 1;
+  for (auto Arg : {&LHS, &RHS}) {
+    bool Tmp = Arg->Zero[SignBitPosition];
+    Arg->Zero.setBitVal(SignBitPosition, Arg->One[SignBitPosition]);
+    Arg->One.setBitVal(SignBitPosition, Tmp);
+  }
+
+  // Find the common bits between (sub nuw LHS, RHS) and (sub nuw RHS, LHS).
+  KnownBits Diff0 =
+      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/true, LHS, RHS);
+  KnownBits Diff1 =
+      computeForAddSub(/*Add=*/false, /*NSW=*/false, /*NUW=*/true, RHS, LHS);
+  return Diff0.intersectWith(Diff1);
 }
 
 static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) {

>From 89907285ab28fd2604ef83803c16e859cc44d8f9 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Thu, 18 Apr 2024 10:06:15 +0100
Subject: [PATCH 3/3] Remove default argument

---
 llvm/unittests/Support/KnownBitsTest.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp
index 9a744271bc93ac..8049517acc9fa0 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -314,10 +314,8 @@ TEST(KnownBitsTest, BinaryExhaustive) {
   testBinaryOpExhaustive(KnownBits::umin, APIntOps::umin);
   testBinaryOpExhaustive(KnownBits::smax, APIntOps::smax);
   testBinaryOpExhaustive(KnownBits::smin, APIntOps::smin);
-  testBinaryOpExhaustive(KnownBits::abdu, APIntOps::abdu,
-                         checkOptimalityBinary);
-  testBinaryOpExhaustive(KnownBits::abds, APIntOps::abds,
-                         checkOptimalityBinary);
+  testBinaryOpExhaustive(KnownBits::abdu, APIntOps::abdu);
+  testBinaryOpExhaustive(KnownBits::abds, APIntOps::abds);
   testBinaryOpExhaustive(
       [](const KnownBits &Known1, const KnownBits &Known2) {
         return KnownBits::udiv(Known1, Known2);