[llvm] r310770 - [x86] add tests for rotate left/right with masked shifter; NFC

Fri Aug 11 17:32:06 PDT 2017

I think we can do this based on what I believe is happening. I think based
on by reading that hardware will mask it to 5-bits and it will rotate that
many bits modulo the data size. This works fine for the data of the
rol/ror. I think that the flags will be updated for 16 where for 0 they
would not be touched. And I believe 17 will not have same flag behavior as
1 which is treated specially by the hardware. But I don't think we every
assume anything about the flags from a variable rotate so that shouldn't
matter.

I think if we were talking about RCR/RCL it would not work because it will
rotate by the 5-bit value modulo data size plus 1. But we don't produce
RCR/RCL so that's non-issue. But they are lumped together in the same
section of Intel's documentation so felt I should mention.

~Craig

On Fri, Aug 11, 2017 at 3:52 PM, Sanjay Patel <spatel at rotateright.com>
wrote:

> Yes, you're right. I even had the code commented out in that link. :)
> I did test the output on Haswell with oversized rotate amounts, and it
> behaves like we would hope - rotating around again and/or masking off the
> high bit(s).
> Is that enough to adjust the td patterns for these cases?
>
> On Fri, Aug 11, 2017 at 4:45 PM, Craig Topper <craig.topper at gmail.com>
> wrote:
>
>> That's not really an existence proof. Isn't that just demonstrating that
>> if you leave the mask out of the source code that gcc assumes that the
>> shift has to be in bounds to avoid UB? If you put an explicit mask in the
>> source code, gcc will remove it from 32 and 64 bit rotates, but not 16-bit
>> rotates.
>>
>>
>>
>> ~Craig
>>
>> On Fri, Aug 11, 2017 at 3:38 PM, Sanjay Patel via llvm-commits <
>> llvm-commits at lists.llvm.org> wrote:
>>
>>> Author: spatel
>>> Date: Fri Aug 11 15:38:40 2017
>>> New Revision: 310770
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=310770&view=rev
>>> Log:
>>> [x86] add tests for rotate left/right with masked shifter; NFC
>>>
>>> As noted in the test comment, instcombine now produces the masked
>>> shift value even when it's not included in the source, so we should
>>> handle this.
>>>
>>> Although the AMD/Intel docs don't say it explicitly, over-rotating
>>> the narrow ops produces the same results. An existence proof that
>>> this works as expected on all x86 comes from gcc 4.9 or later:
>>> https://godbolt.org/g/K6rc1A
>>>
>>> Modified:
>>>     llvm/trunk/test/CodeGen/X86/rotate4.ll
>>>
>>> Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>>> X86/rotate4.ll?rev=310770&r1=310769&r2=310770&view=diff
>>> ============================================================
>>> ==================
>>> --- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
>>> +++ llvm/trunk/test/CodeGen/X86/rotate4.ll Fri Aug 11 15:38:40 2017
>>> @@ -138,3 +138,154 @@ define void @rotate_right_m64(i64 *%pa,
>>>    ret void
>>>  }
>>>
>>> +; The next 8 tests include masks of the narrow width shift amounts that
>>> should be eliminated.
>>> +; These patterns are produced by instcombine after r310509.
>>> +
>>> +define i8 @rotate_left_8(i8 %x, i32 %amount) {
>>> +; CHECK-LABEL: rotate_left_8:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $7, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rolb %cl, %dil
>>> +; CHECK-NEXT:    movl %edi, %eax
>>> +; CHECK-NEXT:    retq
>>> +  %amt = trunc i32 %amount to i8
>>> +  %sub = sub i8 0, %amt
>>> +  %maskamt = and i8 %amt, 7
>>> +  %masksub = and i8 %sub, 7
>>> +  %shl = shl i8 %x, %maskamt
>>> +  %shr = lshr i8 %x, %masksub
>>> +  %or = or i8 %shl, %shr
>>> +  ret i8 %or
>>> +}
>>> +
>>> +define i8 @rotate_right_8(i8 %x, i32 %amount) {
>>> +; CHECK-LABEL: rotate_right_8:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $7, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rorb %cl, %dil
>>> +; CHECK-NEXT:    movl %edi, %eax
>>> +; CHECK-NEXT:    retq
>>> +  %amt = trunc i32 %amount to i8
>>> +  %sub = sub i8 0, %amt
>>> +  %maskamt = and i8 %amt, 7
>>> +  %masksub = and i8 %sub, 7
>>> +  %shr = lshr i8 %x, %maskamt
>>> +  %shl = shl i8 %x, %masksub
>>> +  %or = or i8 %shr, %shl
>>> +  ret i8 %or
>>> +}
>>> +
>>> +define i16 @rotate_left_16(i16 %x, i32 %amount) {
>>> +; CHECK-LABEL: rotate_left_16:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $15, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rolw %cl, %di
>>> +; CHECK-NEXT:    movl %edi, %eax
>>> +; CHECK-NEXT:    retq
>>> +  %amt = trunc i32 %amount to i16
>>> +  %sub = sub i16 0, %amt
>>> +  %maskamt = and i16 %amt, 15
>>> +  %masksub = and i16 %sub, 15
>>> +  %shl = shl i16 %x, %maskamt
>>> +  %shr = lshr i16 %x, %masksub
>>> +  %or = or i16 %shl, %shr
>>> +  ret i16 %or
>>> +}
>>> +
>>> +define i16 @rotate_right_16(i16 %x, i32 %amount) {
>>> +; CHECK-LABEL: rotate_right_16:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $15, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rorw %cl, %di
>>> +; CHECK-NEXT:    movl %edi, %eax
>>> +; CHECK-NEXT:    retq
>>> +  %amt = trunc i32 %amount to i16
>>> +  %sub = sub i16 0, %amt
>>> +  %maskamt = and i16 %amt, 15
>>> +  %masksub = and i16 %sub, 15
>>> +  %shr = lshr i16 %x, %maskamt
>>> +  %shl = shl i16 %x, %masksub
>>> +  %or = or i16 %shr, %shl
>>> +  ret i16 %or
>>> +}
>>> +
>>> +define void @rotate_left_m8(i8* %p, i32 %amount) {
>>> +; CHECK-LABEL: rotate_left_m8:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $7, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rolb %cl, (%rdi)
>>> +; CHECK-NEXT:    retq
>>> +  %x = load i8, i8* %p, align 1
>>> +  %amt = trunc i32 %amount to i8
>>> +  %sub = sub i8 0, %amt
>>> +  %maskamt = and i8 %amt, 7
>>> +  %masksub = and i8 %sub, 7
>>> +  %shl = shl i8 %x, %maskamt
>>> +  %shr = lshr i8 %x, %masksub
>>> +  %or = or i8 %shl, %shr
>>> +  store i8 %or, i8* %p, align 1
>>> +  ret void
>>> +}
>>> +
>>> +define void @rotate_right_m8(i8* %p, i32 %amount) {
>>> +; CHECK-LABEL: rotate_right_m8:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $7, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rorb %cl, (%rdi)
>>> +; CHECK-NEXT:    retq
>>> +  %x = load i8, i8* %p, align 1
>>> +  %amt = trunc i32 %amount to i8
>>> +  %sub = sub i8 0, %amt
>>> +  %maskamt = and i8 %amt, 7
>>> +  %masksub = and i8 %sub, 7
>>> +  %shl = shl i8 %x, %masksub
>>> +  %shr = lshr i8 %x, %maskamt
>>> +  %or = or i8 %shl, %shr
>>> +  store i8 %or, i8* %p, align 1
>>> +  ret void
>>> +}
>>> +
>>> +define void @rotate_left_m16(i16* %p, i32 %amount) {
>>> +; CHECK-LABEL: rotate_left_m16:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $15, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rolw %cl, (%rdi)
>>> +; CHECK-NEXT:    retq
>>> +  %x = load i16, i16* %p, align 1
>>> +  %amt = trunc i32 %amount to i16
>>> +  %sub = sub i16 0, %amt
>>> +  %maskamt = and i16 %amt, 15
>>> +  %masksub = and i16 %sub, 15
>>> +  %shl = shl i16 %x, %maskamt
>>> +  %shr = lshr i16 %x, %masksub
>>> +  %or = or i16 %shl, %shr
>>> +  store i16 %or, i16* %p, align 1
>>> +  ret void
>>> +}
>>> +
>>> +define void @rotate_right_m16(i16* %p, i32 %amount) {
>>> +; CHECK-LABEL: rotate_right_m16:
>>> +; CHECK:       # BB#0:
>>> +; CHECK-NEXT:    andb $15, %sil
>>> +; CHECK-NEXT:    movl %esi, %ecx
>>> +; CHECK-NEXT:    rorw %cl, (%rdi)
>>> +; CHECK-NEXT:    retq
>>> +  %x = load i16, i16* %p, align 1
>>> +  %amt = trunc i32 %amount to i16
>>> +  %sub = sub i16 0, %amt
>>> +  %maskamt = and i16 %amt, 15
>>> +  %masksub = and i16 %sub, 15
>>> +  %shl = shl i16 %x, %masksub
>>> +  %shr = lshr i16 %x, %maskamt
>>> +  %or = or i16 %shl, %shr
>>> +  store i16 %or, i16* %p, align 1
>>> +  ret void
>>> +}
>>> +
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170811/1fa01a28/attachment.html>