[llvm] r350672 - [InstCombine] canonicalize another raw IR rotate pattern to funnel shift

Tue Jan 8 14:39:56 PST 2019

Author: spatel
Date: Tue Jan  8 14:39:55 2019
New Revision: 350672

URL: http://llvm.org/viewvc/llvm-project?rev=350672&view=rev
Log:
[InstCombine] canonicalize another raw IR rotate pattern to funnel shift

This is matching the equivalent of the DAG expansion, 
so it should never end up with worse perf than the 
original code even if the target doesn't have a rotate
instruction.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/rotate.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp?rev=350672&r1=350671&r2=350672&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Tue Jan  8 14:39:55 2019
@@ -1810,6 +1810,57 @@ Instruction *InstCombiner::matchBSwap(Bi
   return LastInst;
 }
 
+/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
+static Instruction *matchRotate(Instruction &Or) {
+  // TODO: Can we reduce the code duplication between this and the related
+  // rotate matching code under visitSelect and visitTrunc?
+  unsigned Width = Or.getType()->getScalarSizeInBits();
+  if (!isPowerOf2_32(Width))
+    return nullptr;
+
+  // First, find an or'd pair of opposite shifts with the same shifted operand:
+  // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
+  Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+  Value *ShVal, *ShAmt0, *ShAmt1;
+  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
+      !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
+    return nullptr;
+
+  auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+  auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+  if (ShiftOpcode0 == ShiftOpcode1)
+    return nullptr;
+
+  // Match the shift amount operands for a rotate pattern. This always matches
+  // a subtraction on the R operand.
+  auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+    // The shift amount may be masked with negation:
+    // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+    Value *X;
+    unsigned Mask = Width - 1;
+    if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+        match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+      return X;
+
+    return nullptr;
+  };
+
+  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
+  bool SubIsOnLHS = false;
+  if (!ShAmt) {
+    ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
+    SubIsOnLHS = true;
+  }
+  if (!ShAmt)
+    return nullptr;
+
+  bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
+  return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
+}
+
 /// If all elements of two constant vectors are 0/-1 and inverses, return true.
 static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
   unsigned NumElts = C1->getType()->getVectorNumElements();
@@ -2170,6 +2221,9 @@ Instruction *InstCombiner::visitOr(Binar
   if (Instruction *BSwap = matchBSwap(I))
     return BSwap;
 
+  if (Instruction *Rotate = matchRotate(I))
+    return Rotate;
+
   Value *X, *Y;
   const APInt *CV;
   if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=350672&r1=350671&r2=350672&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Tue Jan  8 14:39:55 2019
@@ -1557,8 +1557,7 @@ static Instruction *factorizeMinMaxTree(
 /// funnel shift intrinsic. Example:
 /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
 ///              --> call llvm.fshl.i32(a, a, b)
-static Instruction *foldSelectRotate(SelectInst &Sel,
-                                     InstCombiner::BuilderTy &Builder) {
+static Instruction *foldSelectRotate(SelectInst &Sel) {
   // The false value of the select must be a rotate of the true value.
   Value *Or0, *Or1;
   if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
@@ -2047,7 +2046,7 @@ Instruction *InstCombiner::visitSelectIn
   if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI))
     return Select;
 
-  if (Instruction *Rot = foldSelectRotate(SI, Builder))
+  if (Instruction *Rot = foldSelectRotate(SI))
     return Rot;
 
   return nullptr;

Modified: llvm/trunk/test/Transforms/InstCombine/rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/rotate.ll?rev=350672&r1=350671&r2=350672&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/rotate.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/rotate.ll Tue Jan  8 14:39:55 2019
@@ -213,12 +213,7 @@ define <3 x i42> @rotr_v3i42(<3 x i42> %
 
 define i32 @rotl_safe_i32(i32 %x, i32 %y) {
 ; CHECK-LABEL: @rotl_safe_i32(
-; CHECK-NEXT:    [[NEGY:%.*]] = sub i32 0, [[Y:%.*]]
-; CHECK-NEXT:    [[YMASK:%.*]] = and i32 [[Y]], 31
-; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i32 [[NEGY]], 31
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[X:%.*]], [[YMASK]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[X]], [[NEGYMASK]]
-; CHECK-NEXT:    [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %negy = sub i32 0, %y
@@ -236,12 +231,9 @@ define i32 @rotl_safe_i32(i32 %x, i32 %y
 define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
 ; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
 ; CHECK-NEXT:    [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
-; CHECK-NEXT:    [[YMASK:%.*]] = and i16 [[Y]], 15
 ; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
 ; CHECK-NEXT:    store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2
-; CHECK-NEXT:    [[SHL:%.*]] = shl i16 [[X:%.*]], [[YMASK]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i16 [[X]], [[NEGYMASK]]
-; CHECK-NEXT:    [[R:%.*]] = or i16 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %negy = sub i16 0, %y
@@ -259,12 +251,7 @@ define i16 @rotl_safe_i16_commute_extra_
 
 define i64 @rotr_safe_i64(i64 %x, i64 %y) {
 ; CHECK-LABEL: @rotr_safe_i64(
-; CHECK-NEXT:    [[NEGY:%.*]] = sub i64 0, [[Y:%.*]]
-; CHECK-NEXT:    [[YMASK:%.*]] = and i64 [[Y]], 63
-; CHECK-NEXT:    [[NEGYMASK:%.*]] = and i64 [[NEGY]], 63
-; CHECK-NEXT:    [[SHL:%.*]] = shl i64 [[X:%.*]], [[NEGYMASK]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i64 [[X]], [[YMASK]]
-; CHECK-NEXT:    [[R:%.*]] = or i64 [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
   %negy = sub i64 0, %y
@@ -305,12 +292,7 @@ define i8 @rotr_safe_i8_commute_extra_us
 
 define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @rotl_safe_v2i32(
-; CHECK-NEXT:    [[NEGY:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]]
-; CHECK-NEXT:    [[YMASK:%.*]] = and <2 x i32> [[Y]], <i32 31, i32 31>
-; CHECK-NEXT:    [[NEGYMASK:%.*]] = and <2 x i32> [[NEGY]], <i32 31, i32 31>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], [[YMASK]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[X]], [[NEGYMASK]]
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i32> [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %negy = sub <2 x i32> zeroinitializer, %y
@@ -327,12 +309,7 @@ define <2 x i32> @rotl_safe_v2i32(<2 x i
 
 define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
 ; CHECK-LABEL: @rotr_safe_v3i16(
-; CHECK-NEXT:    [[NEGY:%.*]] = sub <3 x i16> zeroinitializer, [[Y:%.*]]
-; CHECK-NEXT:    [[YMASK:%.*]] = and <3 x i16> [[Y]], <i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[NEGYMASK:%.*]] = and <3 x i16> [[NEGY]], <i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> [[X:%.*]], [[NEGYMASK]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <3 x i16> [[X]], [[YMASK]]
-; CHECK-NEXT:    [[R:%.*]] = or <3 x i16> [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %negy = sub <3 x i16> zeroinitializer, %y