[Mlir-commits] [mlir] b1da82a - [mlir][arith] Fix overflow bug in arith::CeilDivSIOp::fold (#90947)

Wed May 8 13:06:07 PDT 2024

Author: Andrzej Warzyński
Date: 2024-05-08T21:06:03+01:00
New Revision: b1da82ae3dba0982b3a9668ca895ddf4164fb3d1

URL: https://github.com/llvm/llvm-project/commit/b1da82ae3dba0982b3a9668ca895ddf4164fb3d1
DIFF: https://github.com/llvm/llvm-project/commit/b1da82ae3dba0982b3a9668ca895ddf4164fb3d1.diff

LOG: [mlir][arith] Fix overflow bug in arith::CeilDivSIOp::fold (#90947)

The folder for arith::CeilDivSIOp should only be applied when it can be
guaranteed that no overflow would happen. The current implementation
works fine when both dividends are positive and the only arithmetic
operation is the division itself.

However, in cases where either the dividend or divisor is negative (or
both),
the division is split into multiple arith operations, e.g.: `- ( -a /
b)`. That's
additional 2 operations on top of the actual division that can overflow 
- the folder should check all 3 ops for overflow. 

The current logic doesn't do that - it effectively only checks the last
operation
(i.e. the division). It breaks when using e.g. MININT values (e.g. -128
for
8-bit integers) - negating such values overflows.

This PR makes sure that no folding happens if any of the intermediate
arithmetic operations overflows.

Fixes https://github.com/llvm/llvm-project/issues/89382

Added: 
    

Modified: 
    mlir/lib/Dialect/Arith/IR/ArithOps.cpp
    mlir/test/Transforms/constant-fold.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp
index 6f995b93bc3ec..a1568d0ebba3a 100644

--- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp
+++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp
@@ -683,6 +683,8 @@ OpFoldResult arith::CeilDivSIOp::fold(FoldAdaptor adaptor) {
     return getLhs();
 
   // Don't fold if it would overflow or if it requires a division by zero.
+  // TODO: This hook won't fold operations where a = MININT, because
+  // negating MININT overflows. This can be improved.
   bool overflowOrDiv0 = false;
   auto result = constFoldBinaryOp<IntegerAttr>(
       adaptor.getOperands(), [&](APInt a, const APInt &b) {
@@ -701,22 +703,36 @@ OpFoldResult arith::CeilDivSIOp::fold(FoldAdaptor adaptor) {
           // Both positive, return ceil(a, b).
           return signedCeilNonnegInputs(a, b, overflowOrDiv0);
         }
+
+        // No folding happens if any of the intermediate arithmetic operations
+        // overflows.
+        bool overflowNegA = false;
+        bool overflowNegB = false;
+        bool overflowDiv = false;
+        bool overflowNegRes = false;
         if (!aGtZero && !bGtZero) {
           // Both negative, return ceil(-a, -b).
-          APInt posA = zero.ssub_ov(a, overflowOrDiv0);
-          APInt posB = zero.ssub_ov(b, overflowOrDiv0);
-          return signedCeilNonnegInputs(posA, posB, overflowOrDiv0);
+          APInt posA = zero.ssub_ov(a, overflowNegA);
+          APInt posB = zero.ssub_ov(b, overflowNegB);
+          APInt res = signedCeilNonnegInputs(posA, posB, overflowDiv);
+          overflowOrDiv0 = (overflowNegA || overflowNegB || overflowDiv);
+          return res;
         }
         if (!aGtZero && bGtZero) {
           // A is negative, b is positive, return - ( -a / b).
-          APInt posA = zero.ssub_ov(a, overflowOrDiv0);
-          APInt div = posA.sdiv_ov(b, overflowOrDiv0);
-          return zero.ssub_ov(div, overflowOrDiv0);
+          APInt posA = zero.ssub_ov(a, overflowNegA);
+          APInt div = posA.sdiv_ov(b, overflowDiv);
+          APInt res = zero.ssub_ov(div, overflowNegRes);
+          overflowOrDiv0 = (overflowNegA || overflowDiv || overflowNegRes);
+          return res;
         }
         // A is positive, b is negative, return - (a / -b).
-        APInt posB = zero.ssub_ov(b, overflowOrDiv0);
-        APInt div = a.sdiv_ov(posB, overflowOrDiv0);
-        return zero.ssub_ov(div, overflowOrDiv0);
+        APInt posB = zero.ssub_ov(b, overflowNegB);
+        APInt div = a.sdiv_ov(posB, overflowDiv);
+        APInt res = zero.ssub_ov(div, overflowNegRes);
+
+        overflowOrDiv0 = (overflowNegB || overflowDiv || overflowNegRes);
+        return res;
       });
 
   return overflowOrDiv0 ? Attribute() : result;

diff  --git a/mlir/test/Transforms/constant-fold.mlir b/mlir/test/Transforms/constant-fold.mlir
index 253163f2af911..981757aed9b1d 100644
--- a/mlir/test/Transforms/constant-fold.mlir
+++ b/mlir/test/Transforms/constant-fold.mlir
@@ -478,6 +478,44 @@ func.func @simple_arith.ceildivsi() -> (i32, i32, i32, i32, i32) {
 
 // -----
 
+// CHECK-LABEL: func @simple_arith.ceildivsi_overflow
+func.func @simple_arith.ceildivsi_overflow() -> (i8, i16, i32) {
+  // The negative values below are MININTs for the corresponding bit-width. The
+  // folder will try to negate them (so that the division operates on two
+  // positive numbers), but that would cause overflow (negating MININT
+  // overflows). Hence folding should not happen and the original ceildivsi is
+  // preserved.
+
+  // TODO: The folder should be able to fold the following by avoiding
+  // intermediate operations that overflow.
+
+  // CHECK-DAG: %[[C_1:.*]] = arith.constant 7 : i8
+  // CHECK-DAG: %[[MIN_I8:.*]] = arith.constant -128 : i8
+  // CHECK-DAG: %[[C_2:.*]] = arith.constant 7 : i16
+  // CHECK-DAG: %[[MIN_I16:.*]] = arith.constant -32768 : i16
+  // CHECK-DAG: %[[C_3:.*]] = arith.constant 7 : i32
+  // CHECK-DAG: %[[MIN_I32:.*]] = arith.constant -2147483648 : i32
+
+  // CHECK-NEXT: %[[CEILDIV_1:.*]] = arith.ceildivsi %[[MIN_I8]], %[[C_1]]  : i8
+  %0 = arith.constant 7 : i8
+  %min_int_i8 = arith.constant -128 : i8
+  %2 = arith.ceildivsi %min_int_i8, %0 : i8
+
+  // CHECK-NEXT: %[[CEILDIV_2:.*]] = arith.ceildivsi %[[MIN_I16]], %[[C_2]]  : i16
+  %3 = arith.constant 7 : i16
+  %min_int_i16 = arith.constant -32768 : i16
+  %5 = arith.ceildivsi %min_int_i16, %3 : i16
+
+  // CHECK-NEXT: %[[CEILDIV_2:.*]] = arith.ceildivsi %[[MIN_I32]], %[[C_3]]  : i32
+  %6 = arith.constant 7 : i32
+  %min_int_i32 = arith.constant -2147483648 : i32
+  %8 = arith.ceildivsi %min_int_i32, %6 : i32
+
+  return %2, %5, %8 : i8, i16, i32
+}
+
+// -----
+
 // CHECK-LABEL: func @simple_arith.ceildivui
 func.func @simple_arith.ceildivui() -> (i32, i32, i32, i32, i32) {
   // CHECK-DAG: [[C0:%.+]] = arith.constant 0