[PATCH] PR19958 wrong code at -O1 and above on x86_64-linux-gnu (InstCombine)

Fri Jul 4 04:16:21 PDT 2014

+ llvm-commit (Sorry, i forgot to add while replying earlier)

On Fri, Jul 4, 2014 at 4:14 PM, suyog sarda <sardask01 at gmail.com> wrote:

> Hi Duncan,
>
> Thanks a lot for the review. My comments inline :
>
>
> On Thu, Jul 3, 2014 at 8:28 PM, Duncan P. N. Exon Smith <
> dexonsmith at apple.com> wrote:
>
>>
>> > On 2014 Jul 3, at 00:07, Duncan P. N. Exon Smith <dexonsmith at apple.com>
>> wrote:
>> >
>> >> On 2014 Jul 2, at 14:33, Sanjay Patel <spatel at rotateright.com> wrote:
>> >>
>> >> Sorry for the delay. LGTM...but considering that versions of this
>> patch have caused miscompiles twice and I'm pretty new here, I think we
>> should have someone with more instcombine experience give final approval in
>> case I've missed anything.
>> >>
>> >> http://reviews.llvm.org/D4068
>> >>
>> >
>> > Hi Suyog,
>> >
>> > It seems like you might be able to go a little further without much
>> > work.
>>
>> After thinking about it overnight, I think if you reverse the order of
>> the lg and divide, you'll avoid the expensive division.  As a bonus, it
>> looks to me like you can always combine the instructions if you do this
>> carefully.
>>
>> I.e., here's some pseudo-code for `ashr`:
>>
>>     // (icmp eq/ne (ashr exact C2, A), C1)
>>     APInt C1, C2;
>>     if (matchesAShrExact(..., C1, C2)) {
>>       if (!C1) {
>>         if (!C2)
>>           return "i1 1";
>>         return "i1 0";
>>       }
>>
>
> I believe this case is handled earlier in the code (maybe 'instsimplify')
> as observed below.
> e.g:
>
> cat test.ll
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 0, %a
>   %cmp = icmp eq i32 %shr, 0
>   ret i1 %cmp
> }
>
> Output without my patch:
>  llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 true
> }
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 -80, %a
>   %cmp = icmp eq i32 %shr, 0
>   ret i1 %cmp
> }
>
> Output without my patch :
>
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 false
> }
>
> Same for lshr.
>
>       if (!C2)
>>         return "i1 0";
>>
>
> This is also handled earlier :
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 0, %a
>   %cmp = icmp eq i32 %shr, -21
>   ret i1 %cmp
> }
>
> Output without my patch:
>
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 false
> }
>
> Same for lshr.
>
>
>>       if (C1 == C2)
>>         return "icmp eq A, 0";
>>
>
> This is also handled. Though output seems weird.
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 -81, %a
>   %cmp = icmp eq i32 %shr, -81
>   ret i1 %cmp
> }
>
> Output without my patch:
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 true
> }
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = lshr exact i32 -81, %a
>   %cmp = icmp eq i32 %shr, -81
>   ret i1 %cmp
> }
>
> Output without my patch:
>
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 true
> }
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = lshr exact i32 81, %a
>   %cmp = icmp eq i32 %shr, 81
>   ret i1 %cmp
> }
>
> Output without my patch:
>
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 true
> }
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 81, %a
>   %cmp = icmp eq i32 %shr, 81
>   ret i1 %cmp
> }
>
> Output : No change. Same code as above.
>
>
>
>>       if (C1.isNegative() != C2.isNegative())
>>         return "i1 0";
>>
>
> I believe this case is for one of the constant being positive and one
> being negative.
> This is also handled.
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 -80, %a
>
>   %cmp = icmp eq i32 %shr, 41
>   ret i1 %cmp
> }
>
> Output without my patch:
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 false
> }
>
> Same for lshr.
>
>
>>       if (C1.isNegative()) {
>>         C1 = -C1;
>>         C2 = -C2;
>>       }
>>
>
> At this point, both the constants are negative.
>
>
>>       if (C1 > C2)
>>         return "i1 0";
>>
>
> This is also handled.
>
> e.g:
>
> define i1 @main(i32 %a) {
>   %shr = ashr exact i32 -40, %a
>   %cmp = icmp eq i32 %shr, -81
>   ret i1 %cmp
> }
>
> Output without my patch:
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 false
> }
>
> e.g:
>
> define i1 @main(i32 %a) {
>
>   %shr = lshr exact i32 40, %a
>   %cmp = icmp eq i32 %shr, 81
>   ret i1 %cmp
> }
>
> Output without my patch:
>
> llvm/llvm/build/bin/opt -S -instcombine test.ll
>
> define i1 @main(i32 %a) {
>   ret i1 false
> }
>
> e.g:
> define i1 @main(i32 %a) {
>   %shr = lshr exact i32 -40, %a
>   %cmp = icmp eq i32 %shr, -81
>   ret i1 %cmp
> }
>
> Output: No change in above code
>
>
>
>>       int LgC1 = C1.logBase2(),
>>           LgC2 = C2.logBase2();
>>       if (LgC1 == LgC2)
>>         return "i1 0";
>>       assert(LgC2 > LgC1);
>>       // This subtraction takes the place of your original division.
>>       int Shift = LgC2 - LgC1;
>>       if (C1.shl(Shift) == C2)
>>         return "icmp eq A, {Shift}";
>>       return "i1 0";
>>    }
>>
>> I think other than the handling of `isNegative()`, the same logic works
>> for `lshr`.
>>
>
> Yes, i agree this is more cost effective way to calculate shift. I will
> update the patch
> to use this instead of division.
>
>
>>
>> I think you can use the same basic logic for non-`exact` right-shifts as
>> well.  I believe you just need to modify the initial 0-related checks
>> and reverse the direction of the shift, effectively masking out the
>> lower bits of C2 in the comparison at the end:
>>
>>     // (icmp eq/ne (ashr C2, A), C1)
>>     APInt C1, C2;
>>     if (matchesAShrNotExact(..., C1, C2)) {
>>       if (!C1) {
>>         // Corner cases for C1 == 0 are different.
>>         if (!C2)
>>           return "i1 1";
>>
>
> This is handled earlier.
>
>
>>         if (C2.isNegative())
>>           return "i1 0";
>>
>
> This is also handled earlier.
>
>
>>         int Shift = C2.logBase2();
>>         return "icmp gt A, {Shift};
>>       }
>>
>
> This is not handled. Will include it in my patch.
>
>
>>       if (!C2)
>>         return "i1 0";
>
>
> This is handled earlier.
>
>
>>       // ... same code as before, until:
>>       // Use lshr here, since we've canonicalized to +ve numbers.
>>       int Shift = LgC2 - LgC1;
>>       if (C1 == C2.lshr(Shift))
>>         return "icmp eq A, {Shift}";
>>       return "i1 0";
>>     }
>>
>
> This i will include in my patch.
>
>
>>
>> Again, I think that aside from negative checking, the same basic logic
>> works for `lshr`.
>>
>
>
>
> So, basically, we need to calculate shift by LgC2-LgC1 instead of Lg(C2/C1)
> and compare accordingly :)
>
> I will soon update the patch.
>
> Thanks!!
>
> --
> With regards,
> Suyog Sarda
>

-- 
With regards,
Suyog Sarda
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140704/43f943ec/attachment.html>