[llvm] r310770 - [x86] add tests for rotate left/right with masked shifter; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 11 15:52:16 PDT 2017
Yes, you're right. I even had the code commented out in that link. :)
I did test the output on Haswell with oversized rotate amounts, and it
behaves like we would hope - rotating around again and/or masking off the
high bit(s).
Is that enough to adjust the td patterns for these cases?
On Fri, Aug 11, 2017 at 4:45 PM, Craig Topper <craig.topper at gmail.com>
wrote:
> That's not really an existence proof. Isn't that just demonstrating that
> if you leave the mask out of the source code that gcc assumes that the
> shift has to be in bounds to avoid UB? If you put an explicit mask in the
> source code, gcc will remove it from 32 and 64 bit rotates, but not 16-bit
> rotates.
>
>
>
> ~Craig
>
> On Fri, Aug 11, 2017 at 3:38 PM, Sanjay Patel via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: spatel
>> Date: Fri Aug 11 15:38:40 2017
>> New Revision: 310770
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=310770&view=rev
>> Log:
>> [x86] add tests for rotate left/right with masked shifter; NFC
>>
>> As noted in the test comment, instcombine now produces the masked
>> shift value even when it's not included in the source, so we should
>> handle this.
>>
>> Although the AMD/Intel docs don't say it explicitly, over-rotating
>> the narrow ops produces the same results. An existence proof that
>> this works as expected on all x86 comes from gcc 4.9 or later:
>> https://godbolt.org/g/K6rc1A
>>
>> Modified:
>> llvm/trunk/test/CodeGen/X86/rotate4.ll
>>
>> Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/rotate4.ll?rev=310770&r1=310769&r2=310770&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/rotate4.ll Fri Aug 11 15:38:40 2017
>> @@ -138,3 +138,154 @@ define void @rotate_right_m64(i64 *%pa,
>> ret void
>> }
>>
>> +; The next 8 tests include masks of the narrow width shift amounts that
>> should be eliminated.
>> +; These patterns are produced by instcombine after r310509.
>> +
>> +define i8 @rotate_left_8(i8 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_8:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $7, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rolb %cl, %dil
>> +; CHECK-NEXT: movl %edi, %eax
>> +; CHECK-NEXT: retq
>> + %amt = trunc i32 %amount to i8
>> + %sub = sub i8 0, %amt
>> + %maskamt = and i8 %amt, 7
>> + %masksub = and i8 %sub, 7
>> + %shl = shl i8 %x, %maskamt
>> + %shr = lshr i8 %x, %masksub
>> + %or = or i8 %shl, %shr
>> + ret i8 %or
>> +}
>> +
>> +define i8 @rotate_right_8(i8 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_8:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $7, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rorb %cl, %dil
>> +; CHECK-NEXT: movl %edi, %eax
>> +; CHECK-NEXT: retq
>> + %amt = trunc i32 %amount to i8
>> + %sub = sub i8 0, %amt
>> + %maskamt = and i8 %amt, 7
>> + %masksub = and i8 %sub, 7
>> + %shr = lshr i8 %x, %maskamt
>> + %shl = shl i8 %x, %masksub
>> + %or = or i8 %shr, %shl
>> + ret i8 %or
>> +}
>> +
>> +define i16 @rotate_left_16(i16 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_16:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $15, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rolw %cl, %di
>> +; CHECK-NEXT: movl %edi, %eax
>> +; CHECK-NEXT: retq
>> + %amt = trunc i32 %amount to i16
>> + %sub = sub i16 0, %amt
>> + %maskamt = and i16 %amt, 15
>> + %masksub = and i16 %sub, 15
>> + %shl = shl i16 %x, %maskamt
>> + %shr = lshr i16 %x, %masksub
>> + %or = or i16 %shl, %shr
>> + ret i16 %or
>> +}
>> +
>> +define i16 @rotate_right_16(i16 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_16:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $15, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rorw %cl, %di
>> +; CHECK-NEXT: movl %edi, %eax
>> +; CHECK-NEXT: retq
>> + %amt = trunc i32 %amount to i16
>> + %sub = sub i16 0, %amt
>> + %maskamt = and i16 %amt, 15
>> + %masksub = and i16 %sub, 15
>> + %shr = lshr i16 %x, %maskamt
>> + %shl = shl i16 %x, %masksub
>> + %or = or i16 %shr, %shl
>> + ret i16 %or
>> +}
>> +
>> +define void @rotate_left_m8(i8* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_m8:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $7, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rolb %cl, (%rdi)
>> +; CHECK-NEXT: retq
>> + %x = load i8, i8* %p, align 1
>> + %amt = trunc i32 %amount to i8
>> + %sub = sub i8 0, %amt
>> + %maskamt = and i8 %amt, 7
>> + %masksub = and i8 %sub, 7
>> + %shl = shl i8 %x, %maskamt
>> + %shr = lshr i8 %x, %masksub
>> + %or = or i8 %shl, %shr
>> + store i8 %or, i8* %p, align 1
>> + ret void
>> +}
>> +
>> +define void @rotate_right_m8(i8* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_m8:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $7, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rorb %cl, (%rdi)
>> +; CHECK-NEXT: retq
>> + %x = load i8, i8* %p, align 1
>> + %amt = trunc i32 %amount to i8
>> + %sub = sub i8 0, %amt
>> + %maskamt = and i8 %amt, 7
>> + %masksub = and i8 %sub, 7
>> + %shl = shl i8 %x, %masksub
>> + %shr = lshr i8 %x, %maskamt
>> + %or = or i8 %shl, %shr
>> + store i8 %or, i8* %p, align 1
>> + ret void
>> +}
>> +
>> +define void @rotate_left_m16(i16* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_m16:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $15, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rolw %cl, (%rdi)
>> +; CHECK-NEXT: retq
>> + %x = load i16, i16* %p, align 1
>> + %amt = trunc i32 %amount to i16
>> + %sub = sub i16 0, %amt
>> + %maskamt = and i16 %amt, 15
>> + %masksub = and i16 %sub, 15
>> + %shl = shl i16 %x, %maskamt
>> + %shr = lshr i16 %x, %masksub
>> + %or = or i16 %shl, %shr
>> + store i16 %or, i16* %p, align 1
>> + ret void
>> +}
>> +
>> +define void @rotate_right_m16(i16* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_m16:
>> +; CHECK: # BB#0:
>> +; CHECK-NEXT: andb $15, %sil
>> +; CHECK-NEXT: movl %esi, %ecx
>> +; CHECK-NEXT: rorw %cl, (%rdi)
>> +; CHECK-NEXT: retq
>> + %x = load i16, i16* %p, align 1
>> + %amt = trunc i32 %amount to i16
>> + %sub = sub i16 0, %amt
>> + %maskamt = and i16 %amt, 15
>> + %masksub = and i16 %sub, 15
>> + %shl = shl i16 %x, %masksub
>> + %shr = lshr i16 %x, %maskamt
>> + %or = or i16 %shl, %shr
>> + store i16 %or, i16* %p, align 1
>> + ret void
>> +}
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170811/c0b6c583/attachment.html>
More information about the llvm-commits
mailing list