[llvm] r346960 - [InstSimplify] delete shift-of-zero guard ops around funnel shifts

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 15 06:53:37 PST 2018


Author: spatel
Date: Thu Nov 15 06:53:37 2018
New Revision: 346960

URL: http://llvm.org/viewvc/llvm-project?rev=346960&view=rev
Log:
[InstSimplify] delete shift-of-zero guard ops around funnel shifts

This is a problem seen in common rotate idioms as noted in:
https://bugs.llvm.org/show_bug.cgi?id=34924

Note that we are not canonicalizing standard IR (shifts and logic) to the intrinsics yet. 
(Although I've written this before...) I think this is the last step before we enable 
that transform. Ie, we could regress code by doing that transform without this 
simplification in place.

In PR34924, I questioned whether this is a valid transform for target-independent IR, 
but I convinced myself this is ok. If we're speculating a funnel shift by turning cmp+br 
into select, then SimplifyCFG has already determined that the transform is justified. 
It's possible that SimplifyCFG is not taking into account profile or other metadata, 
but if that's true, then it's a bug independent of funnel shifts.

Also, we do have CGP code to restore a guard like this around an intrinsic if it can't 
be lowered cheaply. But that isn't necessary for funnel shift because the default 
expansion in SelectionDAGBuilder includes this same cmp+select.

Differential Revision: https://reviews.llvm.org/D54552

Modified:
    llvm/trunk/lib/Analysis/InstructionSimplify.cpp
    llvm/trunk/test/Transforms/InstSimplify/call.ll

Modified: llvm/trunk/lib/Analysis/InstructionSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InstructionSimplify.cpp?rev=346960&r1=346959&r2=346960&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InstructionSimplify.cpp (original)
+++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp Thu Nov 15 06:53:37 2018
@@ -3837,6 +3837,28 @@ static Value *simplifySelectWithICmpCond
       if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
                                            Pred == ICmpInst::ICMP_EQ))
         return V;
+
+    // Test for zero-shift-guard-ops around funnel shifts. These are used to
+    // avoid UB from oversized shifts in raw IR rotate patterns, but the
+    // intrinsics do not have that problem.
+    Value *ShAmt;
+    auto isFsh = m_CombineOr(m_Intrinsic<Intrinsic::fshl>(m_Value(X), m_Value(),
+                                                          m_Value(ShAmt)),
+                             m_Intrinsic<Intrinsic::fshr>(m_Value(), m_Value(X),
+                                                          m_Value(ShAmt)));
+    // (ShAmt != 0) ? fshl(X, *, ShAmt) : X --> fshl(X, *, ShAmt)
+    // (ShAmt != 0) ? fshr(*, X, ShAmt) : X --> fshr(*, X, ShAmt)
+    // (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
+    // (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
+    if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
+      return Pred == ICmpInst::ICMP_NE ? TrueVal : X;
+
+    // (ShAmt == 0) ? X : fshl(X, *, ShAmt) --> fshl(X, *, ShAmt)
+    // (ShAmt == 0) ? X : fshr(*, X, ShAmt) --> fshr(*, X, ShAmt)
+    // (ShAmt != 0) ? X : fshl(X, *, ShAmt) --> X
+    // (ShAmt != 0) ? X : fshr(*, X, ShAmt) --> X
+    if (match(FalseVal, isFsh) && TrueVal == X && CmpLHS == ShAmt)
+      return Pred == ICmpInst::ICMP_EQ ? FalseVal : X;
   }
 
   // Check for other compares that behave like bit test.

Modified: llvm/trunk/test/Transforms/InstSimplify/call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstSimplify/call.ll?rev=346960&r1=346959&r2=346960&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstSimplify/call.ll (original)
+++ llvm/trunk/test/Transforms/InstSimplify/call.ll Thu Nov 15 06:53:37 2018
@@ -500,12 +500,12 @@ define <2 x i8> @fshr_no_shift_modulo_bi
   ret <2 x i8> %z
 }
 
+; When the shift amount is 0, fshl returns its 1st parameter (x), so the guard is not needed.
+
 define i8 @fshl_zero_shift_guard(i8 %x, i8 %y, i8 %sh) {
 ; CHECK-LABEL: @fshl_zero_shift_guard(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[F]]
-; CHECK-NEXT:    ret i8 [[S]]
+; CHECK-NEXT:    [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[SH:%.*]])
+; CHECK-NEXT:    ret i8 [[F]]
 ;
   %c = icmp eq i8 %sh, 0
   %f = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %sh)
@@ -513,12 +513,12 @@ define i8 @fshl_zero_shift_guard(i8 %x,
   ret i8 %s
 }
 
+; When the shift amount is 0, fshl returns its 1st parameter (x), so the guard is not needed.
+
 define i8 @fshl_zero_shift_guard_swapped(i8 %x, i8 %y, i8 %sh) {
 ; CHECK-LABEL: @fshl_zero_shift_guard_swapped(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 [[F]], i8 [[X]]
-; CHECK-NEXT:    ret i8 [[S]]
+; CHECK-NEXT:    [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[SH:%.*]])
+; CHECK-NEXT:    ret i8 [[F]]
 ;
   %c = icmp ne i8 %sh, 0
   %f = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %sh)
@@ -526,12 +526,11 @@ define i8 @fshl_zero_shift_guard_swapped
   ret i8 %s
 }
 
+; When the shift amount is 0, fshl returns its 1st parameter (x), so everything is deleted.
+
 define i8 @fshl_zero_shift_guard_inverted(i8 %x, i8 %y, i8 %sh) {
 ; CHECK-LABEL: @fshl_zero_shift_guard_inverted(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 [[F]], i8 [[X]]
-; CHECK-NEXT:    ret i8 [[S]]
+; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
   %c = icmp eq i8 %sh, 0
   %f = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %sh)
@@ -539,12 +538,11 @@ define i8 @fshl_zero_shift_guard_inverte
   ret i8 %s
 }
 
+; When the shift amount is 0, fshl returns its 1st parameter (x), so everything is deleted.
+
 define i8 @fshl_zero_shift_guard_inverted_swapped(i8 %x, i8 %y, i8 %sh) {
 ; CHECK-LABEL: @fshl_zero_shift_guard_inverted_swapped(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[Y:%.*]], i8 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[F]]
-; CHECK-NEXT:    ret i8 [[S]]
+; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
   %c = icmp ne i8 %sh, 0
   %f = call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %sh)
@@ -552,12 +550,12 @@ define i8 @fshl_zero_shift_guard_inverte
   ret i8 %s
 }
 
+; When the shift amount is 0, fshr returns its 2nd parameter (y), so the guard is not needed.
+
 define i9 @fshr_zero_shift_guard(i9 %x, i9 %y, i9 %sh) {
 ; CHECK-LABEL: @fshr_zero_shift_guard(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i9 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i9 @llvm.fshr.i9(i9 [[X:%.*]], i9 [[Y:%.*]], i9 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i9 [[Y]], i9 [[F]]
-; CHECK-NEXT:    ret i9 [[S]]
+; CHECK-NEXT:    [[F:%.*]] = call i9 @llvm.fshr.i9(i9 [[X:%.*]], i9 [[Y:%.*]], i9 [[SH:%.*]])
+; CHECK-NEXT:    ret i9 [[F]]
 ;
   %c = icmp eq i9 %sh, 0
   %f = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 %sh)
@@ -565,12 +563,12 @@ define i9 @fshr_zero_shift_guard(i9 %x,
   ret i9 %s
 }
 
+; When the shift amount is 0, fshr returns its 2nd parameter (y), so the guard is not needed.
+
 define i9 @fshr_zero_shift_guard_swapped(i9 %x, i9 %y, i9 %sh) {
 ; CHECK-LABEL: @fshr_zero_shift_guard_swapped(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i9 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i9 @llvm.fshr.i9(i9 [[X:%.*]], i9 [[Y:%.*]], i9 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i9 [[F]], i9 [[Y]]
-; CHECK-NEXT:    ret i9 [[S]]
+; CHECK-NEXT:    [[F:%.*]] = call i9 @llvm.fshr.i9(i9 [[X:%.*]], i9 [[Y:%.*]], i9 [[SH:%.*]])
+; CHECK-NEXT:    ret i9 [[F]]
 ;
   %c = icmp ne i9 %sh, 0
   %f = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 %sh)
@@ -578,12 +576,11 @@ define i9 @fshr_zero_shift_guard_swapped
   ret i9 %s
 }
 
+; When the shift amount is 0, fshr returns its 2nd parameter (y), so everything is deleted.
+
 define i9 @fshr_zero_shift_guard_inverted(i9 %x, i9 %y, i9 %sh) {
 ; CHECK-LABEL: @fshr_zero_shift_guard_inverted(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq i9 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i9 @llvm.fshr.i9(i9 [[X:%.*]], i9 [[Y:%.*]], i9 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i9 [[F]], i9 [[Y]]
-; CHECK-NEXT:    ret i9 [[S]]
+; CHECK-NEXT:    ret i9 [[Y:%.*]]
 ;
   %c = icmp eq i9 %sh, 0
   %f = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 %sh)
@@ -591,12 +588,11 @@ define i9 @fshr_zero_shift_guard_inverte
   ret i9 %s
 }
 
+; When the shift amount is 0, fshr returns its 2nd parameter (y), so everything is deleted.
+
 define i9 @fshr_zero_shift_guard_inverted_swapped(i9 %x, i9 %y, i9 %sh) {
 ; CHECK-LABEL: @fshr_zero_shift_guard_inverted_swapped(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i9 [[SH:%.*]], 0
-; CHECK-NEXT:    [[F:%.*]] = call i9 @llvm.fshr.i9(i9 [[X:%.*]], i9 [[Y:%.*]], i9 [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i9 [[Y]], i9 [[F]]
-; CHECK-NEXT:    ret i9 [[S]]
+; CHECK-NEXT:    ret i9 [[Y:%.*]]
 ;
   %c = icmp ne i9 %sh, 0
   %f = call i9 @llvm.fshr.i9(i9 %x, i9 %y, i9 %sh)
@@ -604,6 +600,8 @@ define i9 @fshr_zero_shift_guard_inverte
   ret i9 %s
 }
 
+; Negative test - make sure we're matching the correct parameter of fshl.
+
 define i8 @fshl_zero_shift_guard_wrong_select_op(i8 %x, i8 %y, i8 %sh) {
 ; CHECK-LABEL: @fshl_zero_shift_guard_wrong_select_op(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[SH:%.*]], 0
@@ -617,12 +615,12 @@ define i8 @fshl_zero_shift_guard_wrong_s
   ret i8 %s
 }
 
+; Vector types work too.
+
 define <2 x i8> @fshr_zero_shift_guard_splat(<2 x i8> %x, <2 x i8> %y, <2 x i8> %sh) {
 ; CHECK-LABEL: @fshr_zero_shift_guard_splat(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[SH:%.*]], zeroinitializer
-; CHECK-NEXT:    [[F:%.*]] = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i8> [[SH]])
-; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y]], <2 x i8> [[F]]
-; CHECK-NEXT:    ret <2 x i8> [[S]]
+; CHECK-NEXT:    [[F:%.*]] = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i8> [[SH:%.*]])
+; CHECK-NEXT:    ret <2 x i8> [[F]]
 ;
   %c = icmp eq <2 x i8> %sh, zeroinitializer
   %f = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %sh)




More information about the llvm-commits mailing list