[llvm] r350199 - [InstCombine] canonicalize raw IR rotate patterns to funnel shift

Tue Jan 1 13:51:39 PST 2019

Author: spatel
Date: Tue Jan  1 13:51:39 2019
New Revision: 350199

URL: http://llvm.org/viewvc/llvm-project?rev=350199&view=rev
Log:
[InstCombine] canonicalize raw IR rotate patterns to funnel shift

The final piece of IR-level analysis to allow this was committed with:
rL350188

Using the intrinsics should improve transforms based on cost models
like vectorization and inlining.

The backend should be prepared too, so we can now canonicalize more
sequences of shift/logic to the intrinsics and know that the end
result should be equal or better to the original code even if the
target does not have an actual rotate instruction.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/rotate.ll
    llvm/trunk/test/Transforms/PhaseOrdering/rotate.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=350199&r1=350198&r2=350199&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Tue Jan  1 13:51:39 2019
@@ -1547,9 +1547,9 @@ static Instruction *factorizeMinMaxTree(
 }
 
 /// Try to reduce a rotate pattern that includes a compare and select into a
-/// sequence of ALU ops only. Example:
+/// funnel shift intrinsic. Example:
 /// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
-///              --> (a >> (-b & 31)) | (a << (b & 31))
+///              --> call llvm.fshl.i32(a, a, b)
 static Instruction *foldSelectRotate(SelectInst &Sel,
                                      InstCombiner::BuilderTy &Builder) {
   // The false value of the select must be a rotate of the true value.
@@ -1593,17 +1593,12 @@ static Instruction *foldSelectRotate(Sel
     return nullptr;
 
   // This is a rotate that avoids shift-by-bitwidth UB in a suboptimal way.
-  // Convert to safely bitmasked shifts.
-  // TODO: When we can canonicalize to funnel shift intrinsics without risk of
-  // performance regressions, replace this sequence with that call.
-  Value *NegShAmt = Builder.CreateNeg(ShAmt);
-  Value *MaskedShAmt = Builder.CreateAnd(ShAmt, Width - 1);
-  Value *MaskedNegShAmt = Builder.CreateAnd(NegShAmt, Width - 1);
-  Value *NewSA0 = ShAmt == SA0 ? MaskedShAmt : MaskedNegShAmt;
-  Value *NewSA1 = ShAmt == SA1 ? MaskedShAmt : MaskedNegShAmt;
-  Value *NewSh0 = Builder.CreateBinOp(ShiftOpcode0, TVal, NewSA0);
-  Value *NewSh1 = Builder.CreateBinOp(ShiftOpcode1, TVal, NewSA1);
-  return BinaryOperator::CreateOr(NewSh0, NewSh1);
+  // Convert to funnel shift intrinsic.
+  bool IsFshl = (ShAmt == SA0 && ShiftOpcode0 == BinaryOperator::Shl) ||
+                (ShAmt == SA1 && ShiftOpcode1 == BinaryOperator::Shl);
+  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+  Function *F = Intrinsic::getDeclaration(Sel.getModule(), IID, Sel.getType());
+  return IntrinsicInst::Create(F, { TVal, TVal, ShAmt });
 }
 
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {

Modified: llvm/trunk/test/Transforms/InstCombine/rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/rotate.ll?rev=350199&r1=350198&r2=350199&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/rotate.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/rotate.ll Tue Jan  1 13:51:39 2019
@@ -700,12 +700,7 @@ define i9 @rotateleft_9_neg_mask_wide_am
 
 define i32 @rotr_select(i32 %x, i32 %shamt) {
 ; CHECK-LABEL: @rotr_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 0, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[SHAMT]], 31
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP1]], 31
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i32 [[X:%.*]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i32 [[X]], [[TMP3]]
-; CHECK-NEXT:    [[R:%.*]] = or i32 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %cmp = icmp eq i32 %shamt, 0
@@ -721,12 +716,7 @@ define i32 @rotr_select(i32 %x, i32 %sha
 
 define i8 @rotr_select_commute(i8 %x, i8 %shamt) {
 ; CHECK-LABEL: @rotr_select_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 0, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[SHAMT]], 7
-; CHECK-NEXT:    [[TMP3:%.*]] = and i8 [[TMP1]], 7
-; CHECK-NEXT:    [[TMP4:%.*]] = shl i8 [[X:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr i8 [[X]], [[TMP2]]
-; CHECK-NEXT:    [[R:%.*]] = or i8 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %cmp = icmp eq i8 %shamt, 0
@@ -742,12 +732,7 @@ define i8 @rotr_select_commute(i8 %x, i8
 
 define i16 @rotl_select(i16 %x, i16 %shamt) {
 ; CHECK-LABEL: @rotl_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 0, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[SHAMT]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP1]], 15
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr i16 [[X:%.*]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i16 [[X]], [[TMP2]]
-; CHECK-NEXT:    [[R:%.*]] = or i16 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
   %cmp = icmp eq i16 %shamt, 0
@@ -763,12 +748,7 @@ define i16 @rotl_select(i16 %x, i16 %sha
 
 define <2 x i64> @rotl_select_commute(<2 x i64> %x, <2 x i64> %shamt) {
 ; CHECK-LABEL: @rotl_select_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i64> zeroinitializer, [[SHAMT:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[SHAMT]], <i64 63, i64 63>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP1]], <i64 63, i64 63>
-; CHECK-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[X:%.*]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <2 x i64> [[X]], [[TMP3]]
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i64> [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[X:%.*]], <2 x i64> [[X]], <2 x i64> [[SHAMT:%.*]])
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %cmp = icmp eq <2 x i64> %shamt, zeroinitializer

Modified: llvm/trunk/test/Transforms/PhaseOrdering/rotate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/PhaseOrdering/rotate.ll?rev=350199&r1=350198&r2=350199&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/PhaseOrdering/rotate.ll (original)
+++ llvm/trunk/test/Transforms/PhaseOrdering/rotate.ll Tue Jan  1 13:51:39 2019
@@ -5,6 +5,9 @@
 ; This should become a single funnel shift through a combination
 ; of aggressive-instcombine, simplifycfg, and instcombine.
 ; https://bugs.llvm.org/show_bug.cgi?id=34924
+; These are equivalent, but the value name with the new-pm shows a bug -
+; this code should not have been converted to a speculative select with
+; an intermediate transform.
 
 define i32 @rotl(i32 %a, i32 %b) {
 ; OLDPM-LABEL: @rotl(
@@ -14,12 +17,7 @@ define i32 @rotl(i32 %a, i32 %b) {
 ;
 ; NEWPM-LABEL: @rotl(
 ; NEWPM-NEXT:  entry:
-; NEWPM-NEXT:    [[TMP0:%.*]] = sub i32 0, [[B:%.*]]
-; NEWPM-NEXT:    [[TMP1:%.*]] = and i32 [[B]], 31
-; NEWPM-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0]], 31
-; NEWPM-NEXT:    [[TMP3:%.*]] = lshr i32 [[A:%.*]], [[TMP2]]
-; NEWPM-NEXT:    [[TMP4:%.*]] = shl i32 [[A]], [[TMP1]]
-; NEWPM-NEXT:    [[SPEC_SELECT:%.*]] = or i32 [[TMP3]], [[TMP4]]
+; NEWPM-NEXT:    [[SPEC_SELECT:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]])
 ; NEWPM-NEXT:    ret i32 [[SPEC_SELECT]]
 ;
 entry: