[llvm] r311044 - [InstCombine] Make folding (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 support splat vectors

Wed Aug 16 14:52:07 PDT 2017

Author: ctopper
Date: Wed Aug 16 14:52:07 2017
New Revision: 311044

URL: http://llvm.org/viewvc/llvm-project?rev=311044&view=rev
Log:
[InstCombine] Make folding (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 support splat vectors

This also uses decomposeBitTestICmp to decode the compare.

Differential Revision: https://reviews.llvm.org/D36781

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/select.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=311044&r1=311043&r2=311044&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Wed Aug 16 14:52:07 2017
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -694,27 +695,31 @@ Instruction *InstCombiner::foldSelectIns
   // FIXME: Type and constness constraints could be lifted, but we have to
   //        watch code size carefully. We should consider xor instead of
   //        sub/add when we decide to do that.
-  if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
-    if (TrueVal->getType() == Ty) {
-      if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
-        ConstantInt *C1 = nullptr, *C2 = nullptr;
-        if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) {
-          C1 = dyn_cast<ConstantInt>(TrueVal);
-          C2 = dyn_cast<ConstantInt>(FalseVal);
-        } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) {
-          C1 = dyn_cast<ConstantInt>(FalseVal);
-          C2 = dyn_cast<ConstantInt>(TrueVal);
-        }
-        if (C1 && C2) {
+  if (CmpLHS->getType()->isIntOrIntVectorTy() &&
+      CmpLHS->getType() == TrueVal->getType()) {
+    const APInt *C1, *C2;
+    if (match(TrueVal, m_APInt(C1)) && match(FalseVal, m_APInt(C2))) {
+      ICmpInst::Predicate Pred = ICI->getPredicate();
+      Value *X;
+      APInt Mask;
+      if (decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask)) {
+        if (Mask.isSignMask()) {
+          assert(X == CmpLHS && "Expected to use the compare input directly");
+          assert(ICmpInst::isEquality(Pred) && "Expected equality predicate");
+
+          if (Pred == ICmpInst::ICMP_NE)
+            std::swap(C1, C2);
+
           // This shift results in either -1 or 0.
-          Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1);
+          Value *AShr = Builder.CreateAShr(X, Mask.getBitWidth() - 1);
 
           // Check if we can express the operation with a single or.
-          if (C2->isMinusOne())
-            return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1));
+          if (C2->isAllOnesValue())
+            return replaceInstUsesWith(SI, Builder.CreateOr(AShr, *C1));
 
-          Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue());
-          return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1));
+          Value *And = Builder.CreateAnd(AShr, *C2 - *C1);
+          return replaceInstUsesWith(SI, Builder.CreateAdd(And,
+                                        ConstantInt::get(And->getType(), *C1)));
         }
       }
     }

Modified: llvm/trunk/test/Transforms/InstCombine/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/select.ll?rev=311044&r1=311043&r2=311044&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/select.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/select.ll Wed Aug 16 14:52:07 2017
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; PR1822
@@ -649,6 +650,33 @@ define i32 @test35(i32 %x) {
 ; CHECK: ret
 }
 
+define <2 x i32> @test35vec(<2 x i32> %x) {
+; CHECK-LABEL: @test35vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw <2 x i32> [[TMP2]], <i32 60, i32 60>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %cmp = icmp sge <2 x i32> %x, <i32 0, i32 0>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+  ret <2 x i32> %cond
+}
+
+; Make sure we can still perform this optimization with a truncate present
+define i32 @test35_with_trunc(i64 %x) {
+; CHECK-LABEL: @test35_with_trunc(
+; CHECK-NEXT:    [[X1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X1]], 31
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 40
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 60
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %x1 = trunc i64 %x to i32
+  %cmp = icmp sge i32 %x1, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+}
+
 define i32 @test36(i32 %x) {
   %cmp = icmp slt i32 %x, 0
   %cond = select i1 %cmp, i32 60, i32 100
@@ -660,6 +688,18 @@ define i32 @test36(i32 %x) {
 ; CHECK: ret
 }
 
+define <2 x i32> @test36vec(<2 x i32> %x) {
+; CHECK-LABEL: @test36vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 -40, i32 -40>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 100, i32 100>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 0, i32 0>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+  ret <2 x i32> %cond
+}
+
 define i32 @test37(i32 %x) {
   %cmp = icmp sgt i32 %x, -1
   %cond = select i1 %cmp, i32 1, i32 -1
@@ -670,6 +710,17 @@ define i32 @test37(i32 %x) {
 ; CHECK: ret
 }
 
+define <2 x i32> @test37vec(<2 x i32> %x) {
+; CHECK-LABEL: @test37vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 -1, i32 -1>
+  ret <2 x i32> %cond
+}
+
 define i1 @test38(i1 %cond) {
   %zero = alloca i32
   %one = alloca i32