[llvm] 88c5b50 - [AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel shifts (REAPPLIED)

Mon Dec 21 07:22:54 PST 2020

Author: Simon Pilgrim
Date: 2020-12-21T15:22:27Z
New Revision: 88c5b5006064d62cae4592e66f5bc8b7a7326ef2

URL: https://github.com/llvm/llvm-project/commit/88c5b5006064d62cae4592e66f5bc8b7a7326ef2
DIFF: https://github.com/llvm/llvm-project/commit/88c5b5006064d62cae4592e66f5bc8b7a7326ef2.diff

LOG: [AggressiveInstCombine] Generalize foldGuardedRotateToFunnelShift to generic funnel shifts (REAPPLIED)

The fold currently only handles rotation patterns, but with the maturation of backend funnel shift handling we can now realistically handle all funnel shift patterns.

This should allow us to begin resolving PR46896 et al.

Ensure we block poison in a funnel shift value - similar to rG0fe91ad463fea9d08cbcd640a62aa9ca2d8d05e0

Reapplied with fix for PR48068 - we weren't checking that the shift values could be hoisted from their basicblocks.

Differential Revision: https://reviews.llvm.org/D90625

Added: 
    

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
    llvm/test/Transforms/AggressiveInstCombine/funnel.ll
    llvm/test/Transforms/AggressiveInstCombine/rotate.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index e7fb699d9fda..a7ae10d156d5 100644

--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -39,6 +40,8 @@ using namespace PatternMatch;
 STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
 STATISTIC(NumGuardedRotates,
           "Number of guarded rotates transformed into funnel shifts");
+STATISTIC(NumGuardedFunnelShifts,
+          "Number of guarded funnel shifts transformed into funnel shifts");
 STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
 
 namespace {
@@ -67,17 +70,17 @@ class AggressiveInstCombinerLegacyPass : public FunctionPass {
 };
 } // namespace
 
-/// Match a pattern for a bitwise rotate operation that partially guards
-/// against undefined behavior by branching around the rotation when the shift
-/// amount is 0.
-static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+/// Match a pattern for a bitwise funnel/rotate operation that partially guards
+/// against undefined behavior by branching around the funnel-shift/rotation
+/// when the shift amount is 0.
+static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
   if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
     return false;
 
   // As with the one-use checks below, this is not strictly necessary, but we
   // are being cautious to avoid potential perf regressions on targets that
-  // do not actually have a rotate instruction (where the funnel shift would be
-  // expanded back into math/shift/logic ops).
+  // do not actually have a funnel/rotate instruction (where the funnel shift
+  // would be expanded back into math/shift/logic ops).
   if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
     return false;
 
@@ -111,30 +114,41 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
     return Intrinsic::not_intrinsic;
   };
 
-  // One phi operand must be a rotate operation, and the other phi operand must
-  // be the source value of that rotate operation:
+  // One phi operand must be a funnel/rotate operation, and the other phi
+  // operand must be the source value of that funnel/rotate operation:
   // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
+  // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
+  // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
   PHINode &Phi = cast<PHINode>(I);
   unsigned FunnelOp = 0, GuardOp = 1;
   Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
   Value *ShVal0, *ShVal1, *ShAmt;
   Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
-  if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P1) {
+  if (IID == Intrinsic::not_intrinsic ||
+      (IID == Intrinsic::fshl && ShVal0 != P1) ||
+      (IID == Intrinsic::fshr && ShVal1 != P1)) {
     IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
-    if (IID == Intrinsic::not_intrinsic || ShVal0 != ShVal1 || ShVal0 != P0)
+    if (IID == Intrinsic::not_intrinsic ||
+        (IID == Intrinsic::fshl && ShVal0 != P0) ||
+        (IID == Intrinsic::fshr && ShVal1 != P0))
       return false;
     assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
            "Pattern must match funnel shift left or right");
     std::swap(FunnelOp, GuardOp);
   }
-  assert(ShVal0 == ShVal1 && "Rotation funnel shift pattern expected");
 
   // The incoming block with our source operand must be the "guard" block.
-  // That must contain a cmp+branch to avoid the rotate when the shift amount
-  // is equal to 0. The other incoming block is the block with the rotate.
+  // That must contain a cmp+branch to avoid the funnel/rotate when the shift
+  // amount is equal to 0. The other incoming block is the block with the
+  // funnel/rotate.
   BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
   BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
   Instruction *TermI = GuardBB->getTerminator();
+
+  // Ensure that the shift values dominate each block.
+  if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
+    return false;
+
   ICmpInst::Predicate Pred;
   BasicBlock *PhiBB = Phi.getParent();
   if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()),
@@ -144,24 +158,39 @@ static bool foldGuardedRotateToFunnelShift(Instruction &I) {
   if (Pred != CmpInst::ICMP_EQ)
     return false;
 
+  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+
+  if (ShVal0 == ShVal1)
+    ++NumGuardedRotates;
+  else
+    ++NumGuardedFunnelShifts;
+
+  // If this is not a rotate then the select was blocking poison from the
+  // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
+  bool IsFshl = IID == Intrinsic::fshl;
+  if (ShVal0 != ShVal1) {
+    if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
+      ShVal1 = Builder.CreateFreeze(ShVal1);
+    else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
+      ShVal0 = Builder.CreateFreeze(ShVal0);
+  }
+
   // We matched a variation of this IR pattern:
   // GuardBB:
   //   %cmp = icmp eq i32 %ShAmt, 0
   //   br i1 %cmp, label %PhiBB, label %FunnelBB
   // FunnelBB:
   //   %sub = sub i32 32, %ShAmt
-  //   %shr = lshr i32 %RotSrc, %sub
-  //   %shl = shl i32 %RotSrc, %ShAmt
-  //   %rot = or i32 %shr, %shl
+  //   %shr = lshr i32 %ShVal1, %sub
+  //   %shl = shl i32 %ShVal0, %ShAmt
+  //   %fsh = or i32 %shr, %shl
   //   br label %PhiBB
   // PhiBB:
-  //   %cond = phi i32 [ %RotSrc, %FunnelBB ], [ %RotSrc, %GuardBB ]
+  //   %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
   // -->
-  // llvm.fshl.i32(i32 %RotSrc, i32 %RotSrc, i32 %ShAmt)
-  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+  // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
   Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
   Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
-  ++NumGuardedRotates;
   return true;
 }
 
@@ -350,7 +379,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
     // iteratively in this loop rather than waiting until the end.
     for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
       MadeChange |= foldAnyOrAllBitsSet(I);
-      MadeChange |= foldGuardedRotateToFunnelShift(I);
+      MadeChange |= foldGuardedFunnelShift(I, DT);
       MadeChange |= tryToRecognizePopCount(I); 
     }
   }

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll
index 545b8e175982..1322e95920d2 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/funnel.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/funnel.ll
@@ -7,14 +7,11 @@ define i32 @fshl(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -38,14 +35,11 @@ define i32 @fshl_commute_phi(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -69,14 +63,11 @@ define i32 @fshl_commute_or(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -102,15 +93,12 @@ define i32 @fshl_insert_valid_location(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY]] ]
-; CHECK-NEXT:    [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
+; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ 1, [[FSHBB]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[TMP1]], [[OTHER]]
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
 entry:
@@ -137,14 +125,11 @@ define i32 @fshr(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[FSHBB]] ], [ [[B]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -168,14 +153,11 @@ define i32 @fshr_commute_phi(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -199,14 +181,11 @@ define i32 @fshr_commute_or(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -396,7 +375,7 @@ end:
   ret i32 %cond
 }
 
-; Negative test - wrong shift.
+; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
 
 define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @not_fshr_5(
@@ -404,14 +383,11 @@ define i32 @not_fshr_5(i32 %a, i32 %b, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[FSHBB:%.*]]
 ; CHECK:       fshbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[C]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[C]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[B:%.*]], [[C]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[FSHBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[C]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[B:%.*]], i32 [[C]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %c, 0
@@ -500,3 +476,45 @@ end:
   ret i32 %cond
 }
 
+; PR48068 - Ensure we don't fold a funnel shift that depends on a shift value that
+; can't be hoisted out of a basic block.
+ at a = global i32 0, align 4
+declare i32 @i(...)
+declare i32 @f(...)
+
+define i32 @PR48068() {
+; CHECK-LABEL: @PR48068(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 bitcast (i32 (...)* @i to i32 ()*)()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[CALL]], [[TMP0]]
+; CHECK-NEXT:    [[CALL_I:%.*]] = call i32 bitcast (i32 (...)* @f to i32 ()*)()
+; CHECK-NEXT:    [[SUB_I:%.*]] = sub nsw i32 32, [[TMP0]]
+; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i32 [[CALL_I]], [[SUB_I]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR_I]]
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[H_0:%.*]] = phi i32 [ [[OR]], [[IF_THEN]] ], [ [[CALL]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[H_0]]
+;
+entry:
+  %call = call i32 bitcast (i32 (...)* @i to i32 ()*)()
+  %0 = load i32, i32* @a, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %shl = shl i32 %call, %0
+  %call.i = call i32 bitcast (i32 (...)* @f to i32 ()*)()
+  %sub.i = sub nsw i32 32, %0
+  %shr.i = lshr i32 %call.i, %sub.i
+  %or = or i32 %shl, %shr.i
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %h.0 = phi i32 [ %or, %if.then ], [ %call, %entry ]
+  ret i32 %h.0
+}

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll
index e47fa9be6f83..0abc6c574603 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/rotate.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/rotate.ll
@@ -370,7 +370,7 @@ end:
   ret i32 %cond
 }
 
-; Negative test - wrong shift.
+; Negative test - wrong shift for rotate (but can be folded to a generic funnel shift).
 
 define i32 @not_rotr_5(i32 %a, i32 %b) {
 ; CHECK-LABEL: @not_rotr_5(
@@ -378,14 +378,11 @@ define i32 @not_rotr_5(i32 %a, i32 %b) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
 ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshr.i32(i32 [[TMP0]], i32 [[A:%.*]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
   %cmp = icmp eq i32 %b, 0