[llvm] r310849 - [x86] fold the mask op on 8- and 16-bit rotates

Mon Aug 14 08:55:43 PDT 2017

Author: spatel
Date: Mon Aug 14 08:55:43 2017
New Revision: 310849

URL: http://llvm.org/viewvc/llvm-project?rev=310849&view=rev
Log:
[x86] fold the mask op on 8- and 16-bit rotates

Ref the post-commit thread for r310770:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20170807/478507.html

The motivating cases as 'C' source examples can look like this:

unsigned char rotate_right_8(unsigned char v, int shift) {
  // shift &= 7;
  v = ( v >> shift ) | ( v << ( 8 - shift ) );
  return v;
}

https://godbolt.org/g/K6rc1A

Notice that the source doesn't contain UB-safe masked shift amounts, but instcombine created those 
in order to produce narrow rotate patterns. This should be the last step needed to resolve PR34046:
https://bugs.llvm.org/show_bug.cgi?id=34046

Differential Revision: https://reviews.llvm.org/D36644

Modified:
    llvm/trunk/lib/Target/X86/X86InstrCompiler.td
    llvm/trunk/test/CodeGen/X86/rotate4.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=310849&r1=310848&r2=310849&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Mon Aug 14 08:55:43 2017
@@ -1627,7 +1627,13 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD
 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 
-// Helper imms that check if a mask doesn't change significant shift bits.
+// Helper imms to check if a mask doesn't change significant shift/rotate bits.
+def immShift8 : ImmLeaf<i8, [{
+  return countTrailingOnes<uint64_t>(Imm) >= 3;
+}]>;
+def immShift16 : ImmLeaf<i8, [{
+  return countTrailingOnes<uint64_t>(Imm) >= 4;
+}]>;
 def immShift32 : ImmLeaf<i8, [{
   return countTrailingOnes<uint64_t>(Imm) >= 5;
 }]>;
@@ -1661,8 +1667,38 @@ multiclass MaskedShiftAmountPats<SDNode
 defm : MaskedShiftAmountPats<shl, "SHL">;
 defm : MaskedShiftAmountPats<srl, "SHR">;
 defm : MaskedShiftAmountPats<sra, "SAR">;
-defm : MaskedShiftAmountPats<rotl, "ROL">;
-defm : MaskedShiftAmountPats<rotr, "ROR">;
+
+// ROL/ROR instructions allow a stronger mask optimization than shift for 8- and
+// 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount
+// because over-rotating produces the same result. This is noted in the Intel
+// docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation
+// amount could affect EFLAGS results, but that does not matter because we are
+// not tracking flags for these nodes.
+multiclass MaskedRotateAmountPats<SDNode frag, string name> {
+  // (rot x (and y, BitWidth - 1)) ==> (rot x, y)
+  def : Pat<(frag GR8:$src1, (and CL, immShift8)),
+  (!cast<Instruction>(name # "8rCL") GR8:$src1)>;
+  def : Pat<(frag GR16:$src1, (and CL, immShift16)),
+  (!cast<Instruction>(name # "16rCL") GR16:$src1)>;
+  def : Pat<(frag GR32:$src1, (and CL, immShift32)),
+  (!cast<Instruction>(name # "32rCL") GR32:$src1)>;
+  def : Pat<(store (frag (loadi8 addr:$dst), (and CL, immShift8)), addr:$dst),
+  (!cast<Instruction>(name # "8mCL") addr:$dst)>;
+  def : Pat<(store (frag (loadi16 addr:$dst), (and CL, immShift16)), addr:$dst),
+  (!cast<Instruction>(name # "16mCL") addr:$dst)>;
+  def : Pat<(store (frag (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
+  (!cast<Instruction>(name # "32mCL") addr:$dst)>;
+
+  // (rot x (and y, 63)) ==> (rot x, y)
+  def : Pat<(frag GR64:$src1, (and CL, immShift64)),
+  (!cast<Instruction>(name # "64rCL") GR64:$src1)>;
+  def : Pat<(store (frag (loadi64 addr:$dst), (and CL, immShift64)), addr:$dst),
+  (!cast<Instruction>(name # "64mCL") addr:$dst)>;
+}
+
+
+defm : MaskedRotateAmountPats<rotl, "ROL">;
+defm : MaskedRotateAmountPats<rotr, "ROR">;
 
 // Double shift amount is implicitly masked.
 multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {

Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate4.ll?rev=310849&r1=310848&r2=310849&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate4.ll Mon Aug 14 08:55:43 2017
@@ -144,7 +144,6 @@ define void @rotate_right_m64(i64 *%pa,
 define i8 @rotate_left_8(i8 %x, i32 %amount) {
 ; CHECK-LABEL: rotate_left_8:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $7, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rolb %cl, %dil
 ; CHECK-NEXT:    movl %edi, %eax
@@ -162,7 +161,6 @@ define i8 @rotate_left_8(i8 %x, i32 %amo
 define i8 @rotate_right_8(i8 %x, i32 %amount) {
 ; CHECK-LABEL: rotate_right_8:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $7, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rorb %cl, %dil
 ; CHECK-NEXT:    movl %edi, %eax
@@ -180,7 +178,6 @@ define i8 @rotate_right_8(i8 %x, i32 %am
 define i16 @rotate_left_16(i16 %x, i32 %amount) {
 ; CHECK-LABEL: rotate_left_16:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $15, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rolw %cl, %di
 ; CHECK-NEXT:    movl %edi, %eax
@@ -198,7 +195,6 @@ define i16 @rotate_left_16(i16 %x, i32 %
 define i16 @rotate_right_16(i16 %x, i32 %amount) {
 ; CHECK-LABEL: rotate_right_16:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $15, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rorw %cl, %di
 ; CHECK-NEXT:    movl %edi, %eax
@@ -216,7 +212,6 @@ define i16 @rotate_right_16(i16 %x, i32
 define void @rotate_left_m8(i8* %p, i32 %amount) {
 ; CHECK-LABEL: rotate_left_m8:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $7, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rolb %cl, (%rdi)
 ; CHECK-NEXT:    retq
@@ -235,7 +230,6 @@ define void @rotate_left_m8(i8* %p, i32
 define void @rotate_right_m8(i8* %p, i32 %amount) {
 ; CHECK-LABEL: rotate_right_m8:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $7, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rorb %cl, (%rdi)
 ; CHECK-NEXT:    retq
@@ -254,7 +248,6 @@ define void @rotate_right_m8(i8* %p, i32
 define void @rotate_left_m16(i16* %p, i32 %amount) {
 ; CHECK-LABEL: rotate_left_m16:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $15, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rolw %cl, (%rdi)
 ; CHECK-NEXT:    retq
@@ -273,7 +266,6 @@ define void @rotate_left_m16(i16* %p, i3
 define void @rotate_right_m16(i16* %p, i32 %amount) {
 ; CHECK-LABEL: rotate_right_m16:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    andb $15, %sil
 ; CHECK-NEXT:    movl %esi, %ecx
 ; CHECK-NEXT:    rorw %cl, (%rdi)
 ; CHECK-NEXT:    retq