[llvm] r339621 - [InstCombine] Re-land: Optimize redundant 'signed truncation check pattern'.

Mon Aug 13 14:54:37 PDT 2018

Author: lebedevri
Date: Mon Aug 13 14:54:37 2018
New Revision: 339621

URL: http://llvm.org/viewvc/llvm-project?rev=339621&view=rev
Log:
[InstCombine] Re-land: Optimize redundant 'signed truncation check pattern'.

Summary:
This comes with `Implicit Conversion Sanitizer - integer sign change` (D50250):
```
signed char test(unsigned int x) { return x; }
```
`clang++ -fsanitize=implicit-conversion -S -emit-llvm -o - /tmp/test.cpp -O3`
* Old: {F6904292}
* With this patch: {F6904294}

General pattern:
  X & Y

Where `Y` is checking that all the high bits (covered by a mask `4294967168`)
are uniform, i.e.  `%arg & 4294967168`  can be either  `4294967168`  or  `0`
Pattern can be one of:
  %t = add        i32 %arg,    128
  %r = icmp   ult i32 %t,      256
Or
  %t0 = shl       i32 %arg,    24
  %t1 = ashr      i32 %t0,     24
  %r  = icmp  eq  i32 %t1,     %arg
Or
  %t0 = trunc     i32 %arg  to i8
  %t1 = sext      i8  %t0   to i32
  %r  = icmp  eq  i32 %t1,     %arg
This pattern is a signed truncation check.

And `X` is checking that some bit in that same mask is zero.
I.e. can be one of:
  %r = icmp sgt i32   %arg,    -1
Or
  %t = and      i32   %arg,    2147483648
  %r = icmp eq  i32   %t,      0

Since we are checking that all the bits in that mask are the same,
and a particular bit is zero, what we are really checking is that all the
masked bits are zero.
So this should be transformed to:
  %r = icmp ult i32 %arg, 128

The transform itself ended up being rather horrible, even though i omitted some cases.
Surely there is some infrastructure that can help clean this up that i missed?

https://rise4fun.com/Alive/3Ou

The initial commit (rL339610)
was reverted, since the first assert was being triggered.
The @positive_with_extra_and test now has coverage for that case.

Reviewers: spatel, craig.topper

Reviewed By: spatel

Subscribers: RKSimon, erichkeane, vsk, llvm-commits

Differential Revision: https://reviews.llvm.org/D50465

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/trunk/test/Transforms/InstCombine/signed-truncation-check.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=339621&r1=339620&r2=339621&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Mon Aug 13 14:54:37 2018
@@ -898,6 +898,130 @@ Value *InstCombiner::foldAndOrOfICmpsOfA
   return nullptr;
 }
 
+/// General pattern:
+///   X & Y
+///
+/// Where Y is checking that all the high bits (covered by a mask 4294967168)
+/// are uniform, i.e.  %arg & 4294967168  can be either  4294967168  or  0
+/// Pattern can be one of:
+///   %t = add        i32 %arg,    128
+///   %r = icmp   ult i32 %t,      256
+/// Or
+///   %t0 = shl       i32 %arg,    24
+///   %t1 = ashr      i32 %t0,     24
+///   %r  = icmp  eq  i32 %t1,     %arg
+/// Or
+///   %t0 = trunc     i32 %arg  to i8
+///   %t1 = sext      i8  %t0   to i32
+///   %r  = icmp  eq  i32 %t1,     %arg
+/// This pattern is a signed truncation check.
+///
+/// And X is checking that some bit in that same mask is zero.
+/// I.e. can be one of:
+///   %r = icmp sgt i32   %arg,    -1
+/// Or
+///   %t = and      i32   %arg,    2147483648
+///   %r = icmp eq  i32   %t,      0
+///
+/// Since we are checking that all the bits in that mask are the same,
+/// and a particular bit is zero, what we are really checking is that all the
+/// masked bits are zero.
+/// So this should be transformed to:
+///   %r = icmp ult i32 %arg, 128
+static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
+                                        Instruction &CxtI,
+                                        InstCombiner::BuilderTy &Builder) {
+  assert(CxtI.getOpcode() == Instruction::And);
+
+  // Match  icmp ult (add %arg, C01), C1   (C1 == C01 << 1; powers of two)
+  auto tryToMatchSignedTruncationCheck = [](ICmpInst *ICmp, Value *&X,
+                                            APInt &SignBitMask) -> bool {
+    CmpInst::Predicate Pred;
+    const APInt *I01, *I1; // powers of two; I1 == I01 << 1
+    if (!(match(ICmp,
+                m_ICmp(Pred, m_Add(m_Value(X), m_Power2(I01)), m_Power2(I1))) &&
+          Pred == ICmpInst::ICMP_ULT && I1->ugt(*I01) && I01->shl(1) == *I1))
+      return false;
+    // Which bit is the new sign bit as per the 'signed truncation' pattern?
+    SignBitMask = *I01;
+    return true;
+  };
+
+  // One icmp needs to be 'signed truncation check'.
+  // We need to match this first, else we will mismatch commutative cases.
+  Value *X1;
+  APInt HighestBit;
+  ICmpInst *OtherICmp;
+  if (tryToMatchSignedTruncationCheck(ICmp1, X1, HighestBit))
+    OtherICmp = ICmp0;
+  else if (tryToMatchSignedTruncationCheck(ICmp0, X1, HighestBit))
+    OtherICmp = ICmp1;
+  else
+    return nullptr;
+
+  assert(HighestBit.isPowerOf2() && "expected to be power of two (non-zero)");
+
+  // Try to match/decompose into:  icmp eq (X & Mask), 0
+  auto tryToDecompose = [](ICmpInst *ICmp, Value *&X,
+                           APInt &UnsetBitsMask) -> bool {
+    CmpInst::Predicate Pred = ICmp->getPredicate();
+    // Can it be decomposed into  icmp eq (X & Mask), 0  ?
+    if (llvm::decomposeBitTestICmp(ICmp->getOperand(0), ICmp->getOperand(1),
+                                   Pred, X, UnsetBitsMask,
+                                   /*LookThruTrunc=*/false) &&
+        Pred == ICmpInst::ICMP_EQ)
+      return true;
+    // Is it  icmp eq (X & Mask), 0  already?
+    const APInt *Mask;
+    if (match(ICmp, m_ICmp(Pred, m_And(m_Value(X), m_APInt(Mask)), m_Zero())) &&
+        Pred == ICmpInst::ICMP_EQ) {
+      UnsetBitsMask = *Mask;
+      return true;
+    }
+    return false;
+  };
+
+  // And the other icmp needs to be decomposable into a bit test.
+  Value *X0;
+  APInt UnsetBitsMask;
+  if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))
+    return nullptr;
+
+  assert(!UnsetBitsMask.isNullValue() && "empty mask makes no sense.");
+
+  // Are they working on the same value?
+  Value *X;
+  if (X1 == X0) {
+    // Ok as is.
+    X = X1;
+  } else if (match(X0, m_Trunc(m_Specific(X1)))) {
+    UnsetBitsMask = UnsetBitsMask.zext(X1->getType()->getScalarSizeInBits());
+    X = X1;
+  } else
+    return nullptr;
+
+  // So which bits should be uniform as per the 'signed truncation check'?
+  // (all the bits starting with (i.e. including) HighestBit)
+  APInt SignBitsMask = ~(HighestBit - 1U);
+
+  // UnsetBitsMask must have some common bits with SignBitsMask,
+  if (!UnsetBitsMask.intersects(SignBitsMask))
+    return nullptr;
+
+  // Does UnsetBitsMask contain any bits outside of SignBitsMask?
+  if (!UnsetBitsMask.isSubsetOf(SignBitsMask)) {
+    APInt OtherHighestBit = (~UnsetBitsMask) + 1U;
+    if (!OtherHighestBit.isPowerOf2())
+      return nullptr;
+    HighestBit = APIntOps::umin(HighestBit, OtherHighestBit);
+  }
+  // Else, if it does not, then all is ok as-is.
+
+  // %r = icmp ult %X, SignBit
+  return Builder.CreateICmpULT(X, ConstantInt::get(X->getType(), HighestBit),
+                               CxtI.getName() + ".simplified");
+}
+
 /// Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
                                     Instruction &CxtI) {
@@ -937,6 +1061,9 @@ Value *InstCombiner::foldAndOfICmps(ICmp
   if (Value *V = foldAndOrOfEqualityCmpsWithConstants(LHS, RHS, true, Builder))
     return V;
 
+  if (Value *V = foldSignedTruncationCheck(LHS, RHS, CxtI, Builder))
+    return V;
+
   // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
   Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
   ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS->getOperand(1));

Modified: llvm/trunk/test/Transforms/InstCombine/signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/signed-truncation-check.ll?rev=339621&r1=339620&r2=339621&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/signed-truncation-check.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/signed-truncation-check.ll Mon Aug 13 14:54:37 2018
@@ -38,11 +38,8 @@
 
 define i1 @positive_with_signbit(i32 %arg) {
 ; CHECK-LABEL: @positive_with_signbit(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   %t2 = add i32 %arg, 128
@@ -53,12 +50,8 @@ define i1 @positive_with_signbit(i32 %ar
 
 define i1 @positive_with_mask(i32 %arg) {
 ; CHECK-LABEL: @positive_with_mask(
-; CHECK-NEXT:    [[T1:%.*]] = and i32 [[ARG:%.*]], 1107296256
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
-; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
 ;
   %t1 = and i32 %arg, 1107296256
   %t2 = icmp eq i32 %t1, 0
@@ -70,11 +63,8 @@ define i1 @positive_with_mask(i32 %arg)
 
 define i1 @positive_with_icmp(i32 %arg) {
 ; CHECK-LABEL: @positive_with_icmp(
-; CHECK-NEXT:    [[T1:%.*]] = icmp ult i32 [[ARG:%.*]], 512
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp ult i32 %arg, 512
   %t2 = add i32 %arg, 128
@@ -86,11 +76,8 @@ define i1 @positive_with_icmp(i32 %arg)
 ; Still the same
 define i1 @positive_with_aggressive_icmp(i32 %arg) {
 ; CHECK-LABEL: @positive_with_aggressive_icmp(
-; CHECK-NEXT:    [[T1:%.*]] = icmp ult i32 [[ARG:%.*]], 128
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 256
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 512
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp ult i32 %arg, 128
   %t2 = add i32 %arg, 256
@@ -105,12 +92,9 @@ define i1 @positive_with_aggressive_icmp
 ; operands of the and.
 define i1 @positive_with_extra_and(i32 %arg, i1 %z) {
 ; CHECK-LABEL: @positive_with_extra_and(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG:%.*]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[Z:%.*]]
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T3]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    [[TMP1:%.*]] = and i1 [[T5_SIMPLIFIED]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   %t2 = add i32 %arg, 128
@@ -126,11 +110,8 @@ define i1 @positive_with_extra_and(i32 %
 
 define <2 x i1> @positive_vec_splat(<2 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_splat(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[ARG]], <i32 128, i32 128>
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult <2 x i32> [[T2]], <i32 256, i32 256>
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]]
-; CHECK-NEXT:    ret <2 x i1> [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    ret <2 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <2 x i32> %arg, <i32 -1, i32 -1>
   %t2 = add <2 x i32> %arg, <i32 128, i32 128>
@@ -268,11 +249,8 @@ declare i32 @gen32()
 define i1 @commutative() {
 ; CHECK-LABEL: @commutative(
 ; CHECK-NEXT:    [[ARG:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt i32 [[ARG]], -1
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T3]], [[T1]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
 ;
   %arg = call i32 @gen32()
   %t1 = icmp sgt i32 %arg, -1
@@ -285,11 +263,8 @@ define i1 @commutative() {
 define i1 @commutative_with_icmp() {
 ; CHECK-LABEL: @commutative_with_icmp(
 ; CHECK-NEXT:    [[ARG:%.*]] = call i32 @gen32()
-; CHECK-NEXT:    [[T1:%.*]] = icmp ult i32 [[ARG]], 512
-; CHECK-NEXT:    [[T2:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T3]], [[T1]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
 ;
   %arg = call i32 @gen32()
   %t1 = icmp ult i32 %arg, 512
@@ -305,12 +280,8 @@ define i1 @commutative_with_icmp() {
 
 define i1 @positive_trunc_signbit(i32 %arg) {
 ; CHECK-LABEL: @positive_trunc_signbit(
-; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i8
-; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i8 [[T1]], -1
-; CHECK-NEXT:    [[T3:%.*]] = add i32 [[ARG]], 128
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG:%.*]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
 ;
   %t1 = trunc i32 %arg to i8
   %t2 = icmp sgt i8 %t1, -1
@@ -323,11 +294,8 @@ define i1 @positive_trunc_signbit(i32 %a
 define i1 @positive_trunc_base(i32 %arg) {
 ; CHECK-LABEL: @positive_trunc_base(
 ; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[ARG:%.*]] to i16
-; CHECK-NEXT:    [[T2:%.*]] = icmp sgt i16 [[T1]], -1
-; CHECK-NEXT:    [[T3:%.*]] = add i16 [[T1]], 128
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult i16 [[T3]], 256
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i16 [[T1]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
 ;
   %t1 = trunc i32 %arg to i16
   %t2 = icmp sgt i16 %t1, -1
@@ -376,8 +344,8 @@ define i1 @oneuse_with_signbit(i32 %arg)
 ; CHECK-NEXT:    call void @use32(i32 [[T2]])
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 256
 ; CHECK-NEXT:    call void @use1(i1 [[T3]])
-; CHECK-NEXT:    [[T4:%.*]] = and i1 [[T1]], [[T3]]
-; CHECK-NEXT:    ret i1 [[T4]]
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt i32 %arg, -1
   call void @use1(i1 %t1)
@@ -399,8 +367,8 @@ define i1 @oneuse_with_mask(i32 %arg) {
 ; CHECK-NEXT:    call void @use32(i32 [[T3]])
 ; CHECK-NEXT:    [[T4:%.*]] = icmp ult i32 [[T3]], 256
 ; CHECK-NEXT:    call void @use1(i1 [[T4]])
-; CHECK-NEXT:    [[T5:%.*]] = and i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[T5]]
+; CHECK-NEXT:    [[T5_SIMPLIFIED:%.*]] = icmp ult i32 [[ARG]], 128
+; CHECK-NEXT:    ret i1 [[T5_SIMPLIFIED]]
 ;
   %t1 = and i32 %arg, 603979776 ; some bit within the target 4294967168 mask.
   call void @use32(i32 %t1)