[llvm] r335984 - [InstCombine] add more tests for shuffle-binop folds; NFC

Fri Jun 29 08:28:11 PDT 2018

Author: spatel
Date: Fri Jun 29 08:28:11 2018
New Revision: 335984

URL: http://llvm.org/viewvc/llvm-project?rev=335984&view=rev
Log:
[InstCombine] add more tests for shuffle-binop folds; NFC

The mul+shl tests add coverage for the fold enabled with D48678.
The and+or tests are not handled yet; that's D48662.

Modified:
    llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll

Modified: llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll?rev=335984&r1=335983&r2=335984&view=diff
==============================================================================

--- llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/shuffle_select.ll Fri Jun 29 08:28:11 2018
@@ -521,7 +521,6 @@ define <4 x i32> @shl_mul(<4 x i32> %v0)
   ret <4 x i32> %t3
 }
 
-; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806
 ; Demanded elements + simplification can remove the mul alone, but that's not the best case.
 
 define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
@@ -550,6 +549,32 @@ define <4 x i32> @shl_mul_not_constant_s
   ret <4 x i32> %t3
 }
 
+; Try with 2 variable inputs.
+
+define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @mul_shl_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
+; CHECK-LABEL: @shl_mul_2_vars(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
+  %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
 ; Or with constant can be converted to add to enable the fold.
 ; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'.
 ; TODO: The 'or' constant is limited to a splat.
@@ -585,4 +610,51 @@ define <4 x i8> @or_add(<4 x i8> %v) {
   %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   ret <4 x i8> %t3
 }
+
+define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
+; CHECK-LABEL: @or_add_not_enough_masking(
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 undef, i8 undef, i8 -64, i8 -64>
+; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 undef, i8 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T1]], <4 x i8> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[T3]]
+;
+  %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1>          ; clear not enough top bits
+  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
+  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
+  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i8> %t3
+}
+
+; Try with 2 variable inputs.
+
+define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
+; CHECK-LABEL: @add_or_2_vars(
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[V1:%.*]], <i32 undef, i32 undef, i32 65536, i32 65537>
+; CHECK-NEXT:    [[T2:%.*]] = or <4 x i32> [[V0]], <i32 31, i32 31, i32 undef, i32 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i32> [[T3]]
+;
+  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
+  %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
+  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
+  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %t3
+}
+
+define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
+; CHECK-LABEL: @or_add_2_vars(
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 undef, i8 undef, i8 -64, i8 -64>
+; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <4 x i8> [[V1:%.*]], <i8 1, i8 2, i8 undef, i8 undef>
+; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T1]], <4 x i8> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x i8> [[T3]]
+;
+  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
+  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
+  %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
+  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i8> %t3
+}