[llvm] d9f064d - [InstCombine] visitTrunc - trunc(shl(X, C)) --> shl(trunc(X),trunc(C)) vector support

Thu Oct 8 14:11:04 PDT 2020

Author: Simon Pilgrim
Date: 2020-10-08T22:07:51+01:00
New Revision: d9f064dc0bd4ea4a29a20068835b37e973be907f

URL: https://github.com/llvm/llvm-project/commit/d9f064dc0bd4ea4a29a20068835b37e973be907f
DIFF: https://github.com/llvm/llvm-project/commit/d9f064dc0bd4ea4a29a20068835b37e973be907f.diff

LOG: [InstCombine] visitTrunc - trunc(shl(X, C)) --> shl(trunc(X),trunc(C)) vector support

Annoyingly vectors aren't supported by shouldChangeType(), but we have precedents for always performing this on vector types (e.g. narrowBinOp).

Differential Revision: https://reviews.llvm.org/D89067

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
    llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
    llvm/test/Transforms/InstCombine/trunc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 609d3e2ac7ee..e259b898351d 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -714,7 +714,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
   Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
   unsigned DestWidth = DestTy->getScalarSizeInBits();
   unsigned SrcWidth = SrcTy->getScalarSizeInBits();
-  ConstantInt *Cst;
 
   // Attempt to truncate the entire input expression tree to the destination
   // type.   Only do this if the dest type is a simple type, don't convert the
@@ -866,20 +865,19 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
   if (Instruction *I = shrinkInsertElt(Trunc, Builder))
     return I;
 
-  if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
-      shouldChangeType(SrcTy, DestTy)) {
+  if (Src->hasOneUse() &&
+      (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
     // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
     // dest type is native and cst < dest size.
-    if (match(Src, m_Shl(m_Value(A), m_ConstantInt(Cst))) &&
+    if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
         !match(A, m_Shr(m_Value(), m_Constant()))) {
       // Skip shifts of shift by constants. It undoes a combine in
       // FoldShiftByConstant and is the extend in reg pattern.
-      if (Cst->getValue().ult(DestWidth)) {
+      APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
+      if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
         Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
-
-        return BinaryOperator::Create(
-          Instruction::Shl, NewTrunc,
-          ConstantInt::get(DestTy, Cst->getValue().trunc(DestWidth)));
+        return BinaryOperator::Create(Instruction::Shl, NewTrunc,
+                                      ConstantExpr::getTrunc(C, DestTy));
       }
     }
   }
@@ -896,6 +894,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
   //   --->
   //   extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
   Value *VecOp;
+  ConstantInt *Cst;
   if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
     auto *VecOpTy = cast<FixedVectorType>(VecOp->getType());
     unsigned VecNumElts = VecOpTy->getNumElements();

diff  --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
index fd5a3a338333..eb5b8eea08d4 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
@@ -362,12 +362,11 @@ define i1 @t10_constants(i32 %x, i64 %y) {
 
 define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @t11_constants_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 12, i32 12>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i64> [[Y:%.*]], <i64 14, i64 14>
-; CHECK-NEXT:    [[T1_TRUNC:%.*]] = trunc <2 x i64> [[T1]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T0]], [[T1_TRUNC]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 26, i32 26>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 12, i32 12>
   %t1 = shl <2 x i64> %y, <i64 14, i64 14>
@@ -378,12 +377,11 @@ define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) {
 }
 define <2 x i1> @t12_constants_vec_nonsplat(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @t12_constants_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 12, i32 14>
-; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i64> [[Y:%.*]], <i64 16, i64 14>
-; CHECK-NEXT:    [[T1_TRUNC:%.*]] = trunc <2 x i64> [[T1]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[T0]], [[T1_TRUNC]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], zeroinitializer
-; CHECK-NEXT:    ret <2 x i1> [[T3]]
+; CHECK-NEXT:    [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 28, i32 28>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP3]]
 ;
   %t0 = lshr <2 x i32> %x, <i32 12, i32 14>
   %t1 = shl <2 x i64> %y, <i64 16, i64 14>

diff  --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
index 59fef0a1b17f..223b2629cc63 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
@@ -27,8 +27,8 @@ define i16 @t0(i32 %x, i16 %y) {
 
 define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 8>
-; CHECK-NEXT:    [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
+; CHECK-NEXT:    [[X_TR:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[T5:%.*]] = shl <2 x i16> [[X_TR]], <i16 8, i16 8>
 ; CHECK-NEXT:    ret <2 x i16> [[T5]]
 ;
   %t0 = sub <2 x i16> <i16 32, i16 32>, %y
@@ -59,8 +59,8 @@ define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
 
 define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
 ; CHECK-LABEL: @t3_vec_nonsplat_undef0(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
-; CHECK-NEXT:    [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
+; CHECK-NEXT:    [[X_TR:%.*]] = trunc <3 x i32> [[X:%.*]] to <3 x i16>
+; CHECK-NEXT:    [[T5:%.*]] = shl <3 x i16> [[X_TR]], <i16 8, i16 0, i16 8>
 ; CHECK-NEXT:    ret <3 x i16> [[T5]]
 ;
   %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
@@ -74,8 +74,8 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
 
 define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
 ; CHECK-LABEL: @t4_vec_nonsplat_undef1(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
-; CHECK-NEXT:    [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
+; CHECK-NEXT:    [[X_TR:%.*]] = trunc <3 x i32> [[X:%.*]] to <3 x i16>
+; CHECK-NEXT:    [[T5:%.*]] = shl <3 x i16> [[X_TR]], <i16 8, i16 0, i16 8>
 ; CHECK-NEXT:    ret <3 x i16> [[T5]]
 ;
   %t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
@@ -89,8 +89,8 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
 
 define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
 ; CHECK-LABEL: @t5_vec_nonsplat_undef1(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
-; CHECK-NEXT:    [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
+; CHECK-NEXT:    [[X_TR:%.*]] = trunc <3 x i32> [[X:%.*]] to <3 x i16>
+; CHECK-NEXT:    [[T5:%.*]] = shl <3 x i16> [[X_TR]], <i16 8, i16 0, i16 8>
 ; CHECK-NEXT:    ret <3 x i16> [[T5]]
 ;
   %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y

diff  --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll
index c1c4749a304f..e59c9be0728a 100644
--- a/llvm/test/Transforms/InstCombine/trunc.ll
+++ b/llvm/test/Transforms/InstCombine/trunc.ll
@@ -704,11 +704,11 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) {
   ret i32 %trunc
 }
 
-; TODO: Should be able to handle vectors
+; Should be able to handle vectors
 define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], <i64 16, i64 16>
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 16, i32 16>
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 16, i64 16>
@@ -718,8 +718,8 @@ define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) {
 
 define <2 x i32> @trunc_shl_nosplat_v2i32_v2i64(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_nosplat_v2i32_v2i64(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], <i64 15, i64 16>
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 15, i32 16>
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 15, i64 16>
@@ -757,8 +757,8 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) {
 
 define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], <i64 31, i64 31>
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 31>
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 31, i64 31>
@@ -768,8 +768,8 @@ define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) {
 
 define <2 x i32> @trunc_shl_v2i32_v2i64_undef(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_undef(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], <i64 31, i64 undef>
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 undef>
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 31, i64 undef>
@@ -779,8 +779,8 @@ define <2 x i32> @trunc_shl_v2i32_v2i64_undef(<2 x i64> %val) {
 
 define <2 x i32> @trunc_shl_v2i32_v2i64_nonuniform(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_nonuniform(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[VAL:%.*]], <i64 31, i64 12>
-; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i64> [[SHL]] to <2 x i32>
+; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 12>
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 31, i64 12>
@@ -865,8 +865,8 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) {
 
 define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) {
 ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[CONV:%.*]] = trunc <8 x i32> [[SHL]] to <8 x i16>
+; CHECK-NEXT:    [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
 ; CHECK-NEXT:    ret <8 x i16> [[CONV]]
 ;
   %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
@@ -894,8 +894,8 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) {
 
 define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) {
 ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A:%.*]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[CONV:%.*]] = trunc <8 x i32> [[SHL]] to <8 x i16>
+; CHECK-NEXT:    [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16>
+; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
 ; CHECK-NEXT:    ret <8 x i16> [[CONV]]
 ;
   %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>