[PATCH] D88783: [InstCombine] matchRotate - fold or(shl(v,x),lshr(v,bw-x)) -> fshl(v,v,x) iff x < bw

Wed Oct 7 04:37:04 PDT 2020

RKSimon updated this revision to Diff 296641.
RKSimon added a comment.

rebase - pass InstCombinerImpl into matchRotate


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88783/new/

https://reviews.llvm.org/D88783

Files:
  llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
  llvm/test/Transforms/InstCombine/rotate.ll


Index: llvm/test/Transforms/InstCombine/rotate.ll
===================================================================

--- llvm/test/Transforms/InstCombine/rotate.ll
+++ llvm/test/Transforms/InstCombine/rotate.ll
@@ -679,12 +679,8 @@
 
 define i64 @rotl_sub_mask(i64 %0, i64 %1) {
 ; CHECK-LABEL: @rotl_sub_mask(
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63
-; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[TMP0:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP0]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %3 = and i64 %1, 63
   %4 = shl i64 %0, %3
@@ -698,12 +694,8 @@
 
 define i64 @rotr_sub_mask(i64 %0, i64 %1) {
 ; CHECK-LABEL: @rotr_sub_mask(
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP0:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP0]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %3 = and i64 %1, 63
   %4 = lshr i64 %0, %3
@@ -715,12 +707,8 @@
 
 define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) {
 ; CHECK-LABEL: @rotr_sub_mask_vector(
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP1:%.*]], <i64 63, i64 63>
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i64> [[TMP0:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw <2 x i64> <i64 64, i64 64>, [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shl <2 x i64> [[TMP0]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i64> [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    ret <2 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %3 = and <2 x i64> %1, <i64 63, i64 63>
   %4 = lshr <2 x i64> %0, %3
Index: llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2072,7 +2072,7 @@
 }
 
 /// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
-static Instruction *matchRotate(Instruction &Or) {
+static Instruction *matchRotate(BinaryOperator &Or, InstCombinerImpl &IC) {
   // TODO: Can we reduce the code duplication between this and the related
   // rotate matching code under visitSelect and visitTrunc?
   unsigned Width = Or.getType()->getScalarSizeInBits();
@@ -2096,7 +2096,8 @@
 
   // Match the shift amount operands for a rotate pattern. This always matches
   // a subtraction on the R operand.
-  auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+  auto matchShiftAmount = [&Or, &IC](Value *L, Value *R,
+                                     unsigned Width) -> Value * {
     // Check for constant shift amounts that sum to the bitwidth.
     // TODO: Support non-uniform shift amounts.
     const APInt *LC, *RC;
@@ -2104,6 +2105,12 @@
       if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
         return ConstantInt::get(L->getType(), *LC);
 
+    // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width
+    if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
+      KnownBits KnownL = IC.computeKnownBits(L, /*Depth*/ 0, &Or);
+      return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+    }
+
     // For non-constant cases we don't support non-pow2 shift masks.
     // TODO: Is it worth matching urem as well?
     if (!isPowerOf2_32(Width))
@@ -2593,7 +2600,7 @@
   if (Instruction *BSwap = matchBSwap(I))
     return BSwap;
 
-  if (Instruction *Rotate = matchRotate(I))
+  if (Instruction *Rotate = matchRotate(I, *this))
     return Rotate;
 
   if (Instruction *Concat = matchOrConcat(I, Builder))


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88783.296641.patch
Type: text/x-patch
Size: 4266 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201007/2f7752e1/attachment.bin>