[PATCH] D88783: [InstCombine] matchRotate - fold or(shl(v,x),lshr(v,bw-x)) -> fshl(v,v,x) iff x < bw

Sat Oct 3 09:24:31 PDT 2020

RKSimon created this revision.
RKSimon added reviewers: spatel, lebedev.ri, nikic.
Herald added a subscriber: hiraditya.
Herald added a project: LLVM.
RKSimon requested review of this revision.

If value tracking can confirm that a shift value is less than the type bitwidth then we can fold general or(shl(v,x),lshr(v,bw-x)) patterns to a rotate intrinic pattern


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88783

Files:
  llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
  llvm/lib/Transforms/InstCombine/InstCombineInternal.h
  llvm/test/Transforms/InstCombine/rotate.ll


Index: llvm/test/Transforms/InstCombine/rotate.ll
===================================================================

--- llvm/test/Transforms/InstCombine/rotate.ll
+++ llvm/test/Transforms/InstCombine/rotate.ll
@@ -679,12 +679,8 @@
 
 define i64 @rotl_sub_mask(i64 %0, i64 %1) {
 ; CHECK-LABEL: @rotl_sub_mask(
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63
-; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[TMP0:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr i64 [[TMP0]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %3 = and i64 %1, 63
   %4 = shl i64 %0, %3
@@ -698,12 +694,8 @@
 
 define i64 @rotr_sub_mask(i64 %0, i64 %1) {
 ; CHECK-LABEL: @rotr_sub_mask(
-; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1:%.*]], 63
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i64 [[TMP0:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw i64 64, [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP0]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    ret i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.fshr.i64(i64 [[TMP0:%.*]], i64 [[TMP0]], i64 [[TMP1:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %3 = and i64 %1, 63
   %4 = lshr i64 %0, %3
@@ -715,12 +707,8 @@
 
 define <2 x i64> @rotr_sub_mask_vector(<2 x i64> %0, <2 x i64> %1) {
 ; CHECK-LABEL: @rotr_sub_mask_vector(
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP1:%.*]], <i64 63, i64 63>
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i64> [[TMP0:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw <2 x i64> <i64 64, i64 64>, [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shl <2 x i64> [[TMP0]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i64> [[TMP6]], [[TMP4]]
-; CHECK-NEXT:    ret <2 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> [[TMP0:%.*]], <2 x i64> [[TMP0]], <2 x i64> [[TMP1:%.*]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %3 = and <2 x i64> %1, <i64 63, i64 63>
   %4 = lshr <2 x i64> %0, %3
Index: llvm/lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -359,6 +359,7 @@
 
   Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
   Instruction *foldFPSignBitOps(BinaryOperator &I);
+  Instruction *matchRotate(BinaryOperator &Or);
 
 public:
   /// Inserts an instruction \p New before instruction \p Old
Index: llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2072,7 +2072,7 @@
 }
 
 /// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
-static Instruction *matchRotate(Instruction &Or) {
+Instruction *InstCombinerImpl::matchRotate(BinaryOperator &Or) {
   // TODO: Can we reduce the code duplication between this and the related
   // rotate matching code under visitSelect and visitTrunc?
   unsigned Width = Or.getType()->getScalarSizeInBits();
@@ -2096,7 +2096,7 @@
 
   // Match the shift amount operands for a rotate pattern. This always matches
   // a subtraction on the R operand.
-  auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+  auto matchShiftAmount = [this,&Or](Value *L, Value *R, unsigned Width) -> Value * {
     // Check for constant shift amounts that sum to the bitwidth.
     // TODO: Support non-uniform shift amounts.
     const APInt *LC, *RC;
@@ -2104,6 +2104,12 @@
       if (LC->ult(Width) && RC->ult(Width) && (*LC + *RC) == Width)
         return ConstantInt::get(L->getType(), *LC);
 
+    // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width
+    if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L))))) {
+      KnownBits KnownL = computeKnownBits(L, /*Depth*/ 0, &Or);
+      return KnownL.getMaxValue().ult(Width) ? L : nullptr;
+    }
+
     // For non-constant cases we don't support non-pow2 shift masks.
     // TODO: Is it worth matching urem as well?
     if (!isPowerOf2_32(Width))


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88783.295984.patch
Type: text/x-patch
Size: 4464 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201003/8b379974/attachment.bin>