[llvm] r310770 - [x86] add tests for rotate left/right with masked shifter; NFC

Sat Aug 12 06:44:40 PDT 2017

Thanks for looking closer - I wasn't even thinking about the flags
difference.
Patch proposal coming up...

On Fri, Aug 11, 2017 at 6:32 PM, Craig Topper <craig.topper at gmail.com>
wrote:

> I think we can do this based on what I believe is happening. I think based
> on by reading that hardware will mask it to 5-bits and it will rotate that
> many bits modulo the data size. This works fine for the data of the
> rol/ror. I think that the flags will be updated for 16 where for 0 they
> would not be touched. And I believe 17 will not have same flag behavior as
> 1 which is treated specially by the hardware. But I don't think we every
> assume anything about the flags from a variable rotate so that shouldn't
> matter.
>
> I think if we were talking about RCR/RCL it would not work because it will
> rotate by the 5-bit value modulo data size plus 1. But we don't produce
> RCR/RCL so that's non-issue. But they are lumped together in the same
> section of Intel's documentation so felt I should mention.
>
> ~Craig
>
> On Fri, Aug 11, 2017 at 3:52 PM, Sanjay Patel <spatel at rotateright.com>
> wrote:
>
>> Yes, you're right. I even had the code commented out in that link. :)
>> I did test the output on Haswell with oversized rotate amounts, and it
>> behaves like we would hope - rotating around again and/or masking off the
>> high bit(s).
>> Is that enough to adjust the td patterns for these cases?
>>
>> On Fri, Aug 11, 2017 at 4:45 PM, Craig Topper <craig.topper at gmail.com>
>> wrote:
>>
>>> That's not really an existence proof. Isn't that just demonstrating that
>>> if you leave the mask out of the source code that gcc assumes that the
>>> shift has to be in bounds to avoid UB? If you put an explicit mask in the
>>> source code, gcc will remove it from 32 and 64 bit rotates, but not 16-bit
>>> rotates.
>>>
>>>
>>>
>>> ~Craig
>>>
>>> On Fri, Aug 11, 2017 at 3:38 PM, Sanjay Patel via llvm-commits <
>>> llvm-commits at lists.llvm.org> wrote:
>>>
>>>> Author: spatel
>>>> Date: Fri Aug 11 15:38:40 2017
>>>> New Revision: 310770
>>>>
>>>> URL: http://llvm.org/viewvc/llvm-project?rev=310770&view=rev
>>>> Log:
>>>> [x86] add tests for rotate left/right with masked shifter; NFC
>>>>
>>>> As noted in the test comment, instcombine now produces the masked
>>>> shift value even when it's not included in the source, so we should
>>>> handle this.
>>>>
>>>> Although the AMD/Intel docs don't say it explicitly, over-rotating
>>>> the narrow ops produces the same results. An existence proof that
>>>> this works as expected on all x86 comes from gcc 4.9 or later:
>>>> https://godbolt.org/g/K6rc1A
>>>>
>>>> Modified:
>>>>     llvm/trunk/test/CodeGen/X86/rotate4.ll
>>>>
>>>> Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>>> X86/rotate4.ll?rev=310770&r1=310769&r2=310770&view=diff
>>>> ============================================================
>>>> ==================
>>>> --- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
>>>> +++ llvm/trunk/test/CodeGen/X86/rotate4.ll Fri Aug 11 15:38:40 2017
>>>> @@ -138,3 +138,154 @@ define void @rotate_right_m64(i64 *%pa,
>>>>    ret void
>>>>  }
>>>>
>>>> +; The next 8 tests include masks of the narrow width shift amounts
>>>> that should be eliminated.
>>>> +; These patterns are produced by instcombine after r310509.
>>>> +
>>>> +define i8 @rotate_left_8(i8 %x, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_left_8:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $7, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rolb %cl, %dil
>>>> +; CHECK-NEXT:    movl %edi, %eax
>>>> +; CHECK-NEXT:    retq
>>>> +  %amt = trunc i32 %amount to i8
>>>> +  %sub = sub i8 0, %amt
>>>> +  %maskamt = and i8 %amt, 7
>>>> +  %masksub = and i8 %sub, 7
>>>> +  %shl = shl i8 %x, %maskamt
>>>> +  %shr = lshr i8 %x, %masksub
>>>> +  %or = or i8 %shl, %shr
>>>> +  ret i8 %or
>>>> +}
>>>> +
>>>> +define i8 @rotate_right_8(i8 %x, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_right_8:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $7, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rorb %cl, %dil
>>>> +; CHECK-NEXT:    movl %edi, %eax
>>>> +; CHECK-NEXT:    retq
>>>> +  %amt = trunc i32 %amount to i8
>>>> +  %sub = sub i8 0, %amt
>>>> +  %maskamt = and i8 %amt, 7
>>>> +  %masksub = and i8 %sub, 7
>>>> +  %shr = lshr i8 %x, %maskamt
>>>> +  %shl = shl i8 %x, %masksub
>>>> +  %or = or i8 %shr, %shl
>>>> +  ret i8 %or
>>>> +}
>>>> +
>>>> +define i16 @rotate_left_16(i16 %x, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_left_16:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $15, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rolw %cl, %di
>>>> +; CHECK-NEXT:    movl %edi, %eax
>>>> +; CHECK-NEXT:    retq
>>>> +  %amt = trunc i32 %amount to i16
>>>> +  %sub = sub i16 0, %amt
>>>> +  %maskamt = and i16 %amt, 15
>>>> +  %masksub = and i16 %sub, 15
>>>> +  %shl = shl i16 %x, %maskamt
>>>> +  %shr = lshr i16 %x, %masksub
>>>> +  %or = or i16 %shl, %shr
>>>> +  ret i16 %or
>>>> +}
>>>> +
>>>> +define i16 @rotate_right_16(i16 %x, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_right_16:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $15, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rorw %cl, %di
>>>> +; CHECK-NEXT:    movl %edi, %eax
>>>> +; CHECK-NEXT:    retq
>>>> +  %amt = trunc i32 %amount to i16
>>>> +  %sub = sub i16 0, %amt
>>>> +  %maskamt = and i16 %amt, 15
>>>> +  %masksub = and i16 %sub, 15
>>>> +  %shr = lshr i16 %x, %maskamt
>>>> +  %shl = shl i16 %x, %masksub
>>>> +  %or = or i16 %shr, %shl
>>>> +  ret i16 %or
>>>> +}
>>>> +
>>>> +define void @rotate_left_m8(i8* %p, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_left_m8:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $7, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rolb %cl, (%rdi)
>>>> +; CHECK-NEXT:    retq
>>>> +  %x = load i8, i8* %p, align 1
>>>> +  %amt = trunc i32 %amount to i8
>>>> +  %sub = sub i8 0, %amt
>>>> +  %maskamt = and i8 %amt, 7
>>>> +  %masksub = and i8 %sub, 7
>>>> +  %shl = shl i8 %x, %maskamt
>>>> +  %shr = lshr i8 %x, %masksub
>>>> +  %or = or i8 %shl, %shr
>>>> +  store i8 %or, i8* %p, align 1
>>>> +  ret void
>>>> +}
>>>> +
>>>> +define void @rotate_right_m8(i8* %p, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_right_m8:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $7, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rorb %cl, (%rdi)
>>>> +; CHECK-NEXT:    retq
>>>> +  %x = load i8, i8* %p, align 1
>>>> +  %amt = trunc i32 %amount to i8
>>>> +  %sub = sub i8 0, %amt
>>>> +  %maskamt = and i8 %amt, 7
>>>> +  %masksub = and i8 %sub, 7
>>>> +  %shl = shl i8 %x, %masksub
>>>> +  %shr = lshr i8 %x, %maskamt
>>>> +  %or = or i8 %shl, %shr
>>>> +  store i8 %or, i8* %p, align 1
>>>> +  ret void
>>>> +}
>>>> +
>>>> +define void @rotate_left_m16(i16* %p, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_left_m16:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $15, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rolw %cl, (%rdi)
>>>> +; CHECK-NEXT:    retq
>>>> +  %x = load i16, i16* %p, align 1
>>>> +  %amt = trunc i32 %amount to i16
>>>> +  %sub = sub i16 0, %amt
>>>> +  %maskamt = and i16 %amt, 15
>>>> +  %masksub = and i16 %sub, 15
>>>> +  %shl = shl i16 %x, %maskamt
>>>> +  %shr = lshr i16 %x, %masksub
>>>> +  %or = or i16 %shl, %shr
>>>> +  store i16 %or, i16* %p, align 1
>>>> +  ret void
>>>> +}
>>>> +
>>>> +define void @rotate_right_m16(i16* %p, i32 %amount) {
>>>> +; CHECK-LABEL: rotate_right_m16:
>>>> +; CHECK:       # BB#0:
>>>> +; CHECK-NEXT:    andb $15, %sil
>>>> +; CHECK-NEXT:    movl %esi, %ecx
>>>> +; CHECK-NEXT:    rorw %cl, (%rdi)
>>>> +; CHECK-NEXT:    retq
>>>> +  %x = load i16, i16* %p, align 1
>>>> +  %amt = trunc i32 %amount to i16
>>>> +  %sub = sub i16 0, %amt
>>>> +  %maskamt = and i16 %amt, 15
>>>> +  %masksub = and i16 %sub, 15
>>>> +  %shl = shl i16 %x, %masksub
>>>> +  %shr = lshr i16 %x, %maskamt
>>>> +  %or = or i16 %shl, %shr
>>>> +  store i16 %or, i16* %p, align 1
>>>> +  ret void
>>>> +}
>>>> +
>>>>
>>>>
>>>> _______________________________________________
>>>> llvm-commits mailing list
>>>> llvm-commits at lists.llvm.org
>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>>
>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170812/a7e346a4/attachment.html>