[llvm] r335888 - [InstCombine] allow shl+mul combos with shuffle (select) fold (PR37806)

Thu Jun 28 10:48:04 PDT 2018

Author: spatel
Date: Thu Jun 28 10:48:04 2018
New Revision: 335888

URL: http://llvm.org/viewvc/llvm-project?rev=335888&view=rev
Log:
[InstCombine] allow shl+mul combos with shuffle (select) fold (PR37806)

This is an enhancement to D48401 that was discussed in:
https://bugs.llvm.org/show_bug.cgi?id=37806

We can convert a shift-left-by-constant into a multiply (we canonicalize IR in the other 
direction because that's generally better of course). This allows us to remove the shuffle 
as we do in the regular opcodes-are-the-same cases.

This requires a small hack to make sure we don't introduce any extra poison:
https://rise4fun.com/Alive/ZGv

Other examples of opcodes where this would work are add+sub and fadd+fsub, but we already 
canonicalize those subs into adds, so there's nothing to do for those cases AFAICT. There 
are planned enhancements for opcode transforms such or -> add.

Note that there's a different fold needed if we've already managed to simplify away a binop 
as seen in the test based on PR37806, but we manage to get that one case here because this 
fold is positioned above the demanded elements fold currently.

Differential Revision: https://reviews.llvm.org/D48485

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp?rev=335888&r1=335887&r2=335888&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp Thu Jun 28 10:48:04 2018
@@ -1164,10 +1164,33 @@ static Instruction *foldSelectShuffles(S
   else
     return nullptr;
 
-  // TODO: There are potential folds where the opcodes do not match (mul+shl).
-  if (B0->getOpcode() != B1->getOpcode())
+  // We need matching binops to fold the lanes together.
+  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
+  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
+  bool DropNSW = false;
+  if (ConstantsAreOp1 && Opc0 != Opc1) {
+    // If we have multiply and shift-left-by-constant, convert the shift:
+    // shl X, C --> mul X, 1 << C
+    // TODO: We drop "nsw" if shift is converted into multiply because it may
+    // not be correct when the shift amount is BitWidth - 1. We could examine
+    // each vector element to determine if it is safe to keep that flag.
+    if (Opc0 == Instruction::Mul && Opc1 == Instruction::Shl) {
+      C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1);
+      Opc1 = Instruction::Mul;
+      DropNSW = true;
+    } else if (Opc0 == Instruction::Shl && Opc1 == Instruction::Mul) {
+      C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0);
+      Opc0 = Instruction::Mul;
+      DropNSW = true;
+    }
+  }
+
+  if (Opc0 != Opc1)
     return nullptr;
 
+  // The opcodes must be the same. Use a new name to make that clear.
+  BinaryOperator::BinaryOps BOpc = Opc0;
+
   // Remove a binop and the shuffle by rearranging the constant:
   // shuffle (op X, C0), (op X, C1), M --> op X, C'
   // shuffle (op C0, X), (op C1, X), M --> op C', X
@@ -1179,13 +1202,14 @@ static Instruction *foldSelectShuffles(S
   if (B0->isIntDivRem())
     NewC = getSafeVectorConstantForIntDivRem(NewC);
 
-  BinaryOperator::BinaryOps Opc = B0->getOpcode();
-  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(Opc, X, NewC) :
-                                         BinaryOperator::Create(Opc, NewC, X);
+  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, X, NewC) :
+                                         BinaryOperator::Create(BOpc, NewC, X);
 
   // Flags are intersected from the 2 source binops.
   NewBO->copyIRFlags(B0);
   NewBO->andIRFlags(B1);
+  if (DropNSW)
+    NewBO->setHasNoSignedWrap(false);
   return NewBO;
 }
 

Modified: llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll?rev=335888&r1=335887&r2=335888&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll Thu Jun 28 10:48:04 2018
@@ -502,14 +502,11 @@ define <4 x double> @fdiv_2_vars(<4 x do
   ret <4 x double> %t3
 }
 
-; FIXME:
 ; Shift-left with constant shift amount can be converted to mul to enable the fold.
 
 define <4 x i32> @mul_shl(<4 x i32> %v0) {
 ; CHECK-LABEL: @mul_shl(
-; CHECK-NEXT:    [[T1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 undef, i32 undef, i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = shl nuw <4 x i32> [[V0]], <i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
   %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -518,11 +515,11 @@ define <4 x i32> @mul_shl(<4 x i32> %v0)
   ret <4 x i32> %t3
 }
 
+; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
+
 define <4 x i32> @shl_mul(<4 x i32> %v0) {
 ; CHECK-LABEL: @shl_mul(
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT:    [[T2:%.*]] = mul nsw <4 x i32> [[V0]], <i32 5, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
   %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -536,8 +533,7 @@ define <4 x i32> @shl_mul(<4 x i32> %v0)
 
 define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
 ; CHECK-LABEL: @mul_is_nop_shl(
-; CHECK-NEXT:    [[T2:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
   %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -546,6 +542,8 @@ define <4 x i32> @mul_is_nop_shl(<4 x i3
   ret <4 x i32> %t3
 }
 
+; Negative test: shift amount (operand 1) must be constant.
+
 define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
 ; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
 ; CHECK-NEXT:    [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]