[llvm] e5ee0b0 - [InstCombine] try to determine "exact" for sdiv

Sun Oct 16 08:14:05 PDT 2022

Author: Sanjay Patel
Date: 2022-10-16T10:59:56-04:00
New Revision: e5ee0b06d694fe7749b56706f1bf67e22eaef628

URL: https://github.com/llvm/llvm-project/commit/e5ee0b06d694fe7749b56706f1bf67e22eaef628
DIFF: https://github.com/llvm/llvm-project/commit/e5ee0b06d694fe7749b56706f1bf67e22eaef628.diff

LOG: [InstCombine] try to determine "exact" for sdiv

If the divisor is a power-of-2 or negative-power-of-2 and the dividend
is known to have >= trailing zeros than the divisor, the division is exact:
https://alive2.llvm.org/ce/z/UGBksM (general proof)
https://alive2.llvm.org/ce/z/D4yPS- (examples based on regression tests)

This isn't the most direct optimization (we could create ashr in these
examples instead of relying on existing folds for exact divides), but
it's possible that there's a more general constraint than just a pow2
divisor, so this might be extended in the future.

This should solve issue #58348.

Differential Revision: https://reviews.llvm.org/D135970

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
    llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
    llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index b98e6736ccf9..c2c35c643adc 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1332,7 +1332,15 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
                               ConstantInt::getAllOnesValue(Ty));
   }
 
-  if (isKnownNonNegative(Op0, DL, 0, &AC, &I, &DT)) {
+  KnownBits KnownDividend = computeKnownBits(Op0, 0, &I);
+  if (!I.isExact() &&
+      (match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) &&
+      KnownDividend.countMinTrailingZeros() >= Op1C->countTrailingZeros()) {
+    I.setIsExact();
+    return &I;
+  }
+
+  if (KnownDividend.isNonNegative()) {
     // If both operands are unsigned, turn this into a udiv.
     if (isKnownNonNegative(Op1, DL, 0, &AC, &I, &DT)) {
       auto *BO = BinaryOperator::CreateUDiv(Op0, Op1, I.getName());

diff  --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
index de74b3129602..81f22bb3c664 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
@@ -64,8 +64,9 @@ define <2 x i8> @n4_vec_undef(<2 x i8> %x) {
 
 define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], -4
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[D_NEG:%.*]] = and i8 [[A]], -8
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[D_NEG]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -32
@@ -75,8 +76,8 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], -32
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[A]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -32
@@ -84,6 +85,8 @@ define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
   ret i8 %d
 }
 
+; negative test - not enough low zeros in dividend
+
 define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_prove_exact_with_high_mask(
 ; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -32
@@ -98,7 +101,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[A]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], <i8 4, i8 4>
+; CHECK-NEXT:    [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %a = shl <2 x i8> %x, <i8 5, i8 5>
@@ -106,6 +110,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y)
   ret <2 x i8> %d
 }
 
+; TODO: Needs knownbits to handle arbitrary vector constants.
+
 define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>

diff  --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
index 0cdc52b6e83d..36af8685f9ae 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
@@ -110,8 +110,8 @@ define i8 @shl1_nsw_not_exact(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], 4
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[D:%.*]] = and i8 [[A]], -2
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -8
@@ -121,15 +121,16 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
-; CHECK-NEXT:    [[D:%.*]] = sdiv i8 [[A]], 8
-; CHECK-NEXT:    ret i8 [[D]]
+; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i8 [[A]]
 ;
   %a = and i8 %x, -8
   %d = sdiv i8 %a, 8
   ret i8 %d
 }
 
+; negative test - not enough low zeros in dividend
+
 define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 ; CHECK-LABEL: @not_prove_exact_with_high_mask(
 ; CHECK-NEXT:    [[A:%.*]] = and i8 [[X:%.*]], -8
@@ -144,7 +145,7 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[A]], <i8 8, i8 8>
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i8> [[A]], <i8 3, i8 3>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %a = shl <2 x i8> %x, <i8 3, i8 3>
@@ -152,6 +153,8 @@ define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y)
   ret <2 x i8> %d
 }
 
+; TODO: Needs knownbits to handle arbitrary vector constants.
+
 define <2 x i8> @prove_exact_with_high_mask_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_vec(
 ; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>