[llvm] 0369714 - [InstCombine] reduce vector casting before icmp

Fri Aug 6 14:15:56 PDT 2021

Author: Sanjay Patel
Date: 2021-08-06T17:09:38-04:00
New Revision: 0369714b31682dc36e55f1a2b3a36c25fb1b6f98

URL: https://github.com/llvm/llvm-project/commit/0369714b31682dc36e55f1a2b3a36c25fb1b6f98
DIFF: https://github.com/llvm/llvm-project/commit/0369714b31682dc36e55f1a2b3a36c25fb1b6f98.diff

LOG: [InstCombine] reduce vector casting before icmp

There may be some generalizations (see test comments) of these patterns,
but this should handle the cases motivated by:
https://llvm.org/PR51315
https://llvm.org/PR51259

The backend may want to transform differently, but at least for
the x86 examples that I looked at, there does not appear to be
any significant perf diff either way.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/icmp-vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index adc6726d1594..8514952e9241 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2936,6 +2936,19 @@ Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
     return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
   }
 
+  // If this is checking if all elements of an extended vector are clear or not,
+  // compare in a narrow type to eliminate the extend:
+  // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
+  Value *X;
+  if (Cmp.isEquality() && C->isNullValue() && Bitcast->hasOneUse() &&
+      match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
+    if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
+      Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
+      Value *NewCast = Builder.CreateBitCast(X, NewType);
+      return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
+    }
+  }
+
   // Folding: icmp <pred> iN X, C
   //  where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
   //    and C is a splat of a K-bit pattern

diff  --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll
index 0b3dc163ba10..d7835110f542 100644
--- a/llvm/test/Transforms/InstCombine/icmp-vec.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll
@@ -585,9 +585,8 @@ define i1 @eq_cast_eq-1_use2(<2 x i4> %x, <2 x i4> %y, i2* %p) {
 
 define i1 @ne_cast_sext(<3 x i1> %b) {
 ; CHECK-LABEL: @ne_cast_sext(
-; CHECK-NEXT:    [[E:%.*]] = sext <3 x i1> [[B:%.*]] to <3 x i8>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <3 x i8> [[E]] to i24
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i24 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x i1> [[B:%.*]] to i3
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i3 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %e = sext <3 x i1> %b to <3 x i8>
@@ -598,9 +597,8 @@ define i1 @ne_cast_sext(<3 x i1> %b) {
 
 define i1 @eq_cast_sext(<8 x i3> %b) {
 ; CHECK-LABEL: @eq_cast_sext(
-; CHECK-NEXT:    [[E:%.*]] = sext <8 x i3> [[B:%.*]] to <8 x i8>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <8 x i8> [[E]] to i64
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i64 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i3> [[B:%.*]] to i24
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i24 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %e = sext <8 x i3> %b to <8 x i8>
@@ -611,9 +609,8 @@ define i1 @eq_cast_sext(<8 x i3> %b) {
 
 define i1 @ne_cast_zext(<4 x i1> %b) {
 ; CHECK-LABEL: @ne_cast_zext(
-; CHECK-NEXT:    [[E:%.*]] = zext <4 x i1> [[B:%.*]] to <4 x i8>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i8> [[E]] to i32
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[B:%.*]] to i4
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i4 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %e = zext <4 x i1> %b to <4 x i8>
@@ -624,9 +621,8 @@ define i1 @ne_cast_zext(<4 x i1> %b) {
 
 define i1 @eq_cast_zext(<5 x i3> %b) {
 ; CHECK-LABEL: @eq_cast_zext(
-; CHECK-NEXT:    [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i35 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <5 x i3> [[B:%.*]] to i15
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i15 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %e = zext <5 x i3> %b to <5 x i7>
@@ -635,6 +631,8 @@ define i1 @eq_cast_zext(<5 x i3> %b) {
   ret i1 %r
 }
 
+; negative test - valid for eq/ne only
+
 define i1 @sgt_cast_zext(<5 x i3> %b) {
 ; CHECK-LABEL: @sgt_cast_zext(
 ; CHECK-NEXT:    [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
@@ -648,6 +646,9 @@ define i1 @sgt_cast_zext(<5 x i3> %b) {
   ret i1 %r
 }
 
+; negative test - not valid with non-zero constants
+; TODO: We could handle some non-zero constants by checking for bit-loss after casts.
+
 define i1 @eq7_cast_sext(<5 x i3> %b) {
 ; CHECK-LABEL: @eq7_cast_sext(
 ; CHECK-NEXT:    [[E:%.*]] = sext <5 x i3> [[B:%.*]] to <5 x i7>
@@ -661,12 +662,14 @@ define i1 @eq7_cast_sext(<5 x i3> %b) {
   ret i1 %r
 }
 
+; extra use of extend is ok
+
 define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
 ; CHECK-LABEL: @eq_cast_zext_use1(
 ; CHECK-NEXT:    [[E:%.*]] = zext <5 x i3> [[B:%.*]] to <5 x i7>
 ; CHECK-NEXT:    store <5 x i7> [[E]], <5 x i7>* [[P:%.*]], align 8
-; CHECK-NEXT:    [[BC:%.*]] = bitcast <5 x i7> [[E]] to i35
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i35 [[BC]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <5 x i3> [[B]] to i15
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i15 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %e = zext <5 x i3> %b to <5 x i7>
@@ -676,6 +679,8 @@ define i1 @eq_cast_zext_use1(<5 x i3> %b, <5 x i7>* %p) {
   ret i1 %r
 }
 
+; negative test - don't create an extra cast
+
 declare void @use35(i35)
 
 define i1 @eq_cast_zext_use2(<5 x i3> %b) {