[PATCH] D64275: [InstCombine] Generalize InstCombiner::foldAndOrOfICmpsOfAndWithPow2().

Fri Jul 12 16:10:47 PDT 2019

huihuiz marked an inline comment as done.
huihuiz added inline comments.


================
Comment at: llvm/test/Transforms/InstCombine/onehot_merge.ll:340
 
 ; Should not fold
 define i1 @foo1_and_extra_use_cmp2(i32 %k, i32 %c1, i32 %c2, i1* %p) {
----------------
lebedev.ri wrote:
> huihuiz wrote:
> > lebedev.ri wrote:
> > > Could fold, no instruction count increase
> > We should not fold when 'cmp' has more than one use, this will increase instruction count when one side is using the new form.
> > 
> > Take this as an example, we need to keep the extra 'cmp' that can't be reused, while the benefit of folding is offset by the added signbit shift instruction.
> > 
> > ```
> > define i1 @foo(i32 %k, i32 %c1, i32 %c2, i1* %p) {
> >   %t0 = shl i32 1, %c1
> >   %t1 = and i32 %t0, %k
> >   %t2 = icmp eq i32 %t1, 0
> >   store i1 %t2, i1* %p  ; extra use of cmp
> >   %t3 = shl i32 %k, %c2
> >   %t4 = icmp sgt i32 %t3, -1
> >   %or = or i1 %t2, %t4
> >   ret i1 %or
> > }
> > 
> > ```
> I believe i'm missing the point in `@foo1_and_extra_use_cmp2`.
> We were doing this fold previously (in llvm trunk), now we don't because of use-count.
> But this restriction does not seem to improve anything?
> We had 8 instructions, and we have 8 instructions without the fold?
> 
This restriction doesn't improve anything, but make sure we don't increase instruction count for certain cases.

If 'cmp' has extra use, and one side is using the new form, e.g., @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1/2 (please expand the test file to see)
Without this restriction, instruction count actually increased. 

If 'cmp' has extra use, and both sides are using old form, e.g., @foo1_and_extra_use_cmp, we have 8 instructions with and without the fold.

Here is the diff without this restriction: 
```

--- a/llvm/test/Transforms/InstCombine/onehot_merge.ll
+++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll
@@ -274,10 +274,10 @@ define i1 @foo1_and_extra_use_cmp(i32 %k, i32 %c1, i32 %c2, i1* %p) {
 ; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[K:%.*]]
 ; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T2]], 0
 ; CHECK-NEXT:    store i1 [[T3]], i1* [[P:%.*]], align 1
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[K]]
-; CHECK-NEXT:    [[T5:%.*]] = icmp eq i32 [[T4]], 0
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[T3]], [[T5]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[T0]], [[T1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[K]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = shl i32 1, %c1
   %t1 = shl i32 1, %c2
@@ -340,13 +340,13 @@ define i1 @foo1_and_extra_use_cmp2(i32 %k, i32 %c1, i32 %c2, i1* %p) {
 ; CHECK-LABEL: @foo1_and_extra_use_cmp2(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 1, [[C1:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = shl i32 1, [[C2:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = and i32 [[T0]], [[K:%.*]]
-; CHECK-NEXT:    [[T3:%.*]] = icmp eq i32 [[T2]], 0
-; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[K]]
+; CHECK-NEXT:    [[T4:%.*]] = and i32 [[T1]], [[K:%.*]]
 ; CHECK-NEXT:    [[T5:%.*]] = icmp eq i32 [[T4]], 0
 ; CHECK-NEXT:    store i1 [[T5]], i1* [[P:%.*]], align 1
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[T3]], [[T5]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[T0]], [[T1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[K]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[TMP3]]
 ;
   %t0 = shl i32 1, %c1
   %t1 = shl i32 1, %c2
@@ -410,10 +410,11 @@ define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp1(i32 %k,
 ; CHECK-NEXT:    [[T1:%.*]] = and i32 [[T0]], [[K:%.*]]
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
 ; CHECK-NEXT:    store i1 [[T2]], i1* [[P:%.*]], align 1
-; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = icmp sgt i32 [[T3]], -1
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 -2147483648, [[C2:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[K]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %t0 = shl i32 1, %c1
   %t1 = and i32 %t0, %k

 define i1 @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2(i32 %k, i32 %c1, i32 %c2, i1* %p) {
 ; CHECK-LABEL: @foo1_and_signbit_lshr_without_shifting_signbit_extra_use_cmp2(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 1, [[C1:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = and i32 [[T0]], [[K:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq i32 [[T1]], 0
-; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[K]], [[C2:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = shl i32 [[K:%.*]], [[C2:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = icmp sgt i32 [[T3]], -1
 ; CHECK-NEXT:    store i1 [[T4]], i1* [[P:%.*]], align 1
-; CHECK-NEXT:    [[OR:%.*]] = or i1 [[T2]], [[T4]]
-; CHECK-NEXT:    ret i1 [[OR]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 -2147483648, [[C2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[K]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i1 [[TMP4]]
 ;
   %t0 = shl i32 1, %c1
   %t1 = and i32 %t0, %k

```




Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64275/new/

https://reviews.llvm.org/D64275