[llvm] r310770 - [x86] add tests for rotate left/right with masked shifter; NFC

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 11 15:52:16 PDT 2017


Yes, you're right. I even had the code commented out in that link. :)
I did test the output on Haswell with oversized rotate amounts, and it
behaves like we would hope - rotating around again and/or masking off the
high bit(s).
Is that enough to adjust the td patterns for these cases?

On Fri, Aug 11, 2017 at 4:45 PM, Craig Topper <craig.topper at gmail.com>
wrote:

> That's not really an existence proof. Isn't that just demonstrating that
> if you leave the mask out of the source code that gcc assumes that the
> shift has to be in bounds to avoid UB? If you put an explicit mask in the
> source code, gcc will remove it from 32 and 64 bit rotates, but not 16-bit
> rotates.
>
>
>
> ~Craig
>
> On Fri, Aug 11, 2017 at 3:38 PM, Sanjay Patel via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: spatel
>> Date: Fri Aug 11 15:38:40 2017
>> New Revision: 310770
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=310770&view=rev
>> Log:
>> [x86] add tests for rotate left/right with masked shifter; NFC
>>
>> As noted in the test comment, instcombine now produces the masked
>> shift value even when it's not included in the source, so we should
>> handle this.
>>
>> Although the AMD/Intel docs don't say it explicitly, over-rotating
>> the narrow ops produces the same results. An existence proof that
>> this works as expected on all x86 comes from gcc 4.9 or later:
>> https://godbolt.org/g/K6rc1A
>>
>> Modified:
>>     llvm/trunk/test/CodeGen/X86/rotate4.ll
>>
>> Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/rotate4.ll?rev=310770&r1=310769&r2=310770&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/rotate4.ll Fri Aug 11 15:38:40 2017
>> @@ -138,3 +138,154 @@ define void @rotate_right_m64(i64 *%pa,
>>    ret void
>>  }
>>
>> +; The next 8 tests include masks of the narrow width shift amounts that
>> should be eliminated.
>> +; These patterns are produced by instcombine after r310509.
>> +
>> +define i8 @rotate_left_8(i8 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_8:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $7, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rolb %cl, %dil
>> +; CHECK-NEXT:    movl %edi, %eax
>> +; CHECK-NEXT:    retq
>> +  %amt = trunc i32 %amount to i8
>> +  %sub = sub i8 0, %amt
>> +  %maskamt = and i8 %amt, 7
>> +  %masksub = and i8 %sub, 7
>> +  %shl = shl i8 %x, %maskamt
>> +  %shr = lshr i8 %x, %masksub
>> +  %or = or i8 %shl, %shr
>> +  ret i8 %or
>> +}
>> +
>> +define i8 @rotate_right_8(i8 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_8:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $7, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rorb %cl, %dil
>> +; CHECK-NEXT:    movl %edi, %eax
>> +; CHECK-NEXT:    retq
>> +  %amt = trunc i32 %amount to i8
>> +  %sub = sub i8 0, %amt
>> +  %maskamt = and i8 %amt, 7
>> +  %masksub = and i8 %sub, 7
>> +  %shr = lshr i8 %x, %maskamt
>> +  %shl = shl i8 %x, %masksub
>> +  %or = or i8 %shr, %shl
>> +  ret i8 %or
>> +}
>> +
>> +define i16 @rotate_left_16(i16 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_16:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $15, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rolw %cl, %di
>> +; CHECK-NEXT:    movl %edi, %eax
>> +; CHECK-NEXT:    retq
>> +  %amt = trunc i32 %amount to i16
>> +  %sub = sub i16 0, %amt
>> +  %maskamt = and i16 %amt, 15
>> +  %masksub = and i16 %sub, 15
>> +  %shl = shl i16 %x, %maskamt
>> +  %shr = lshr i16 %x, %masksub
>> +  %or = or i16 %shl, %shr
>> +  ret i16 %or
>> +}
>> +
>> +define i16 @rotate_right_16(i16 %x, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_16:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $15, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rorw %cl, %di
>> +; CHECK-NEXT:    movl %edi, %eax
>> +; CHECK-NEXT:    retq
>> +  %amt = trunc i32 %amount to i16
>> +  %sub = sub i16 0, %amt
>> +  %maskamt = and i16 %amt, 15
>> +  %masksub = and i16 %sub, 15
>> +  %shr = lshr i16 %x, %maskamt
>> +  %shl = shl i16 %x, %masksub
>> +  %or = or i16 %shr, %shl
>> +  ret i16 %or
>> +}
>> +
>> +define void @rotate_left_m8(i8* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_m8:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $7, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rolb %cl, (%rdi)
>> +; CHECK-NEXT:    retq
>> +  %x = load i8, i8* %p, align 1
>> +  %amt = trunc i32 %amount to i8
>> +  %sub = sub i8 0, %amt
>> +  %maskamt = and i8 %amt, 7
>> +  %masksub = and i8 %sub, 7
>> +  %shl = shl i8 %x, %maskamt
>> +  %shr = lshr i8 %x, %masksub
>> +  %or = or i8 %shl, %shr
>> +  store i8 %or, i8* %p, align 1
>> +  ret void
>> +}
>> +
>> +define void @rotate_right_m8(i8* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_m8:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $7, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rorb %cl, (%rdi)
>> +; CHECK-NEXT:    retq
>> +  %x = load i8, i8* %p, align 1
>> +  %amt = trunc i32 %amount to i8
>> +  %sub = sub i8 0, %amt
>> +  %maskamt = and i8 %amt, 7
>> +  %masksub = and i8 %sub, 7
>> +  %shl = shl i8 %x, %masksub
>> +  %shr = lshr i8 %x, %maskamt
>> +  %or = or i8 %shl, %shr
>> +  store i8 %or, i8* %p, align 1
>> +  ret void
>> +}
>> +
>> +define void @rotate_left_m16(i16* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_left_m16:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $15, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rolw %cl, (%rdi)
>> +; CHECK-NEXT:    retq
>> +  %x = load i16, i16* %p, align 1
>> +  %amt = trunc i32 %amount to i16
>> +  %sub = sub i16 0, %amt
>> +  %maskamt = and i16 %amt, 15
>> +  %masksub = and i16 %sub, 15
>> +  %shl = shl i16 %x, %maskamt
>> +  %shr = lshr i16 %x, %masksub
>> +  %or = or i16 %shl, %shr
>> +  store i16 %or, i16* %p, align 1
>> +  ret void
>> +}
>> +
>> +define void @rotate_right_m16(i16* %p, i32 %amount) {
>> +; CHECK-LABEL: rotate_right_m16:
>> +; CHECK:       # BB#0:
>> +; CHECK-NEXT:    andb $15, %sil
>> +; CHECK-NEXT:    movl %esi, %ecx
>> +; CHECK-NEXT:    rorw %cl, (%rdi)
>> +; CHECK-NEXT:    retq
>> +  %x = load i16, i16* %p, align 1
>> +  %amt = trunc i32 %amount to i16
>> +  %sub = sub i16 0, %amt
>> +  %maskamt = and i16 %amt, 15
>> +  %masksub = and i16 %sub, 15
>> +  %shl = shl i16 %x, %masksub
>> +  %shr = lshr i16 %x, %maskamt
>> +  %or = or i16 %shl, %shr
>> +  store i16 %or, i16* %p, align 1
>> +  ret void
>> +}
>> +
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170811/c0b6c583/attachment.html>


More information about the llvm-commits mailing list