[llvm] ec54867 - [SCEV] Model `ashr exact x, C` as `(abs(x) EXACT/u (1<<C)) * signum(x)`

Sat Oct 17 11:23:18 PDT 2020

Author: Roman Lebedev
Date: 2020-10-17T21:22:24+03:00
New Revision: ec54867df5e7f20e12146e628af34f0384308bcb

URL: https://github.com/llvm/llvm-project/commit/ec54867df5e7f20e12146e628af34f0384308bcb
DIFF: https://github.com/llvm/llvm-project/commit/ec54867df5e7f20e12146e628af34f0384308bcb.diff

LOG: [SCEV] Model `ashr exact x, C` as `(abs(x) EXACT/u (1<<C)) * signum(x)`

It's not pretty, but probably better than modelling it
as an opaque SCEVUnknown, i guess.

It is relevant e.g. for the loop that was brought up in
https://bugs.llvm.org/show_bug.cgi?id=46786#c26
as an example of what we'd be able to better analyze
once SCEV handles `ptrtoint` (D89456).

But as it is evident, even if we deal with `ptrtoint` there,
we also fail to model such an `ashr`.
Also, modeling of mul-of-exact-shr/div could use improvement.

As per alive2:
https://alive2.llvm.org/ce/z/tnfZKd
```
define i8 @src(i8 %0) {
  %2 = ashr exact i8 %0, 4
  ret i8 %2
}

declare i8 @llvm.abs(i8, i1)
declare i8 @llvm.smin(i8, i8)
declare i8 @llvm.smax(i8, i8)

define i8 @tgt(i8 %x) {
  %abs_x = call i8 @llvm.abs(i8 %x, i1 false)
  %div = udiv exact i8 %abs_x, 16
  %t0 = call i8 @llvm.smax(i8 %x, i8 -1)
  %t1 = call i8 @llvm.smin(i8 %t0, i8 1)
  %r = mul nsw i8 %div, %t1
  ret i8 %r
}
```
Transformation seems to be correct!

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/ashr.ll
    llvm/test/Analysis/ScalarEvolution/ptrtoint.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 65d092a59347..19e3607bc0dd 100644

--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -573,6 +573,7 @@ class ScalarEvolution {
   const SCEV *getGEPExpr(GEPOperator *GEP,
                          const SmallVectorImpl<const SCEV *> &IndexExprs);
   const SCEV *getAbsExpr(const SCEV *Op, bool IsNSW);
+  const SCEV *getSignumExpr(const SCEV *Op);
   const SCEV *getMinMaxExpr(unsigned Kind,
                             SmallVectorImpl<const SCEV *> &Operands);
   const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f3e152b00967..1845ca96c5b7 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3339,6 +3339,11 @@ const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
   return getSMaxExpr(Op, getNegativeSCEV(Op, Flags));
 }
 
+const SCEV *ScalarEvolution::getSignumExpr(const SCEV *Op) {
+  Type *Ty = Op->getType();
+  return getSMinExpr(getSMaxExpr(Op, getMinusOne(Ty)), getOne(Ty));
+}
+
 const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
                                            SmallVectorImpl<const SCEV *> &Ops) {
   assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
@@ -4273,6 +4278,7 @@ struct BinaryOp {
   Value *RHS;
   bool IsNSW = false;
   bool IsNUW = false;
+  bool IsExact = false;
 
   /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or
   /// constant expression.
@@ -4285,11 +4291,14 @@ struct BinaryOp {
       IsNSW = OBO->hasNoSignedWrap();
       IsNUW = OBO->hasNoUnsignedWrap();
     }
+    if (auto *PEO = dyn_cast<PossiblyExactOperator>(Op))
+      IsExact = PEO->isExact();
   }
 
   explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false,
-                    bool IsNUW = false)
-      : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {}
+                    bool IsNUW = false, bool IsExact = false)
+      : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW),
+        IsExact(IsExact) {}
 };
 
 } // end anonymous namespace
@@ -6267,6 +6276,15 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
           }
         }
       }
+      if (BO->IsExact) {
+        // Given exact arithmetic in-bounds right-shift by a constant,
+        // we can lower it into:  (abs(x) EXACT/u (1<<C)) * signum(x)
+        const SCEV *X = getSCEV(BO->LHS);
+        const SCEV *AbsX = getAbsExpr(X, /*IsNSW=*/false);
+        APInt Mult = APInt::getOneBitSet(BitWidth, AShrAmt);
+        const SCEV *Div = getUDivExactExpr(AbsX, getConstant(Mult));
+        return getMulExpr(Div, getSignumExpr(X), SCEV::FlagNSW);
+      }
       break;
     }
     }

diff  --git a/llvm/test/Analysis/ScalarEvolution/ashr.ll b/llvm/test/Analysis/ScalarEvolution/ashr.ll
index 15e17468ce09..cf54c3e71641 100644
--- a/llvm/test/Analysis/ScalarEvolution/ashr.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ashr.ll
@@ -42,7 +42,7 @@ define i32 @t3(i32 %x, i32 %y) {
 ; ALL-LABEL: 't3'
 ; ALL-NEXT:  Classifying expressions for: @t3
 ; ALL-NEXT:    %i0 = ashr exact i32 %x, 4
-; ALL-NEXT:    --> %i0 U: full-set S: [-134217728,134217728)
+; ALL-NEXT:    --> ((((-1 * %x) smax %x) /u 16) * (1 smin (-1 smax %x)))<nsw> U: [-268435455,268435456) S: [-268435455,268435456)
 ; ALL-NEXT:  Determining loop execution counts for: @t3
 ;
   %i0 = ashr exact i32 %x, 4
@@ -65,7 +65,7 @@ define i32 @t5(i32 %x, i32 %y) {
 ; ALL-LABEL: 't5'
 ; ALL-NEXT:  Classifying expressions for: @t5
 ; ALL-NEXT:    %i0 = ashr exact i32 %x, 5
-; ALL-NEXT:    --> %i0 U: full-set S: [-67108864,67108864)
+; ALL-NEXT:    --> ((((-1 * %x) smax %x) /u 32) * (1 smin (-1 smax %x)))<nsw> U: [-134217727,134217728) S: [-134217727,134217728)
 ; ALL-NEXT:  Determining loop execution counts for: @t5
 ;
   %i0 = ashr exact i32 %x, 5

diff  --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index 3e556345a516..1d32c5951e2a 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -325,6 +325,8 @@ bb5:
 ;   for (int* cur = start; cur != end; ++cur)
 ;     other[cur - start] += *cur;
 ; }
+;
+; FIXME: 4 * (%i10 EXACT/s 4) is just %i10
 define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
 ; X64-LABEL: 'pr46786_c26_int'
 ; X64-NEXT:  Classifying expressions for: @pr46786_c26_int
@@ -339,9 +341,9 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
 ; X64-NEXT:    %i10 = sub i64 %i9, %i4
 ; X64-NEXT:    --> ((-1 * %i4) + %i9) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i11 = ashr exact i64 %i10, 2
-; X64-NEXT:    --> %i11 U: full-set S: [-2305843009213693952,2305843009213693952) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X64-NEXT:    --> (((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9))))<nsw> U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
-; X64-NEXT:    --> ((4 * %i11)<nsw> + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X64-NEXT:    --> ((4 * ((((-1 * %i4) + %i9) smax ((-1 * %i9) + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4) + %i9)))) + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i13 = load i32, i32* %i12, align 4
 ; X64-NEXT:    --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X64-NEXT:    %i14 = add nsw i32 %i13, %i8
@@ -368,9 +370,9 @@ define void @pr46786_c26_int(i32* %arg, i32* %arg1, i32* %arg2) {
 ; X32-NEXT:    %i10 = sub i64 %i9, %i4
 ; X32-NEXT:    --> ((-1 * %i4)<nsw> + %i9) U: [-4294967295,4294967296) S: [-8589934591,8589934592) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i11 = ashr exact i64 %i10, 2
-; X32-NEXT:    --> %i11 U: full-set S: [-2147483648,2147483648) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X32-NEXT:    --> (((((-1 * %i4)<nsw> + %i9) smax ((-1 * %i9)<nsw> + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4)<nsw> + %i9))))<nsw> U: [-4611686018427387903,4611686018427387904) S: [-4611686018427387903,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i12 = getelementptr inbounds i32, i32* %arg2, i64 %i11
-; X32-NEXT:    --> ((4 * (trunc i64 %i11 to i32))<nsw> + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
+; X32-NEXT:    --> ((4 * (trunc i64 (((((-1 * %i4)<nsw> + %i9) smax ((-1 * %i9)<nsw> + %i4)) /u 4) * (1 smin (-1 smax ((-1 * %i4)<nsw> + %i9))))<nsw> to i32))<nsw> + %arg2)<nsw> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i13 = load i32, i32* %i12, align 4
 ; X32-NEXT:    --> %i13 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant }
 ; X32-NEXT:    %i14 = add nsw i32 %i13, %i8