[llvm] r340441 - [X86] Replace (32/64 - n) shift amounts with (neg n) since the shift amount is masked in hardware

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 22 12:39:09 PDT 2018


Author: ctopper
Date: Wed Aug 22 12:39:09 2018
New Revision: 340441

URL: http://llvm.org/viewvc/llvm-project?rev=340441&view=rev
Log:
[X86] Replace (32/64 - n) shift amounts with (neg n) since the shift amount is masked in hardware

Inspired by what AArch64 does for shifts, this patch attempts to replace shift amounts with neg if we can.

This is done directly as part of isel so its as late as possible to avoid breaking some BZHI patterns since those patterns need an unmasked (32-n) to be correct.

To avoid manual load folding and custom instruction selection for the negate. I've inserted new nodes in the DAG above the shift node in topological order.

Differential Revision: https://reviews.llvm.org/D48789

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
    llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
    llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Aug 22 12:39:09 2018
@@ -456,6 +456,7 @@ namespace {
     bool matchBEXTRFromAnd(SDNode *Node);
     bool shrinkAndImmediate(SDNode *N);
     bool isMaskZeroExtended(SDNode *N) const;
+    bool tryShiftAmountMod(SDNode *N);
 
     MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
                                 const SDLoc &dl, MVT VT, SDNode *Node);
@@ -2690,6 +2691,102 @@ MachineSDNode *X86DAGToDAGISel::emitPCMP
   return CNode;
 }
 
+bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  // Only handle scalar shifts.
+  if (VT.isVector())
+    return false;
+
+  // Narrower shifts only mask to 5 bits in hardware.
+  unsigned Size = VT == MVT::i64 ? 64 : 32;
+
+  SDValue OrigShiftAmt = N->getOperand(1);
+  SDValue ShiftAmt = OrigShiftAmt;
+  SDLoc DL(N);
+
+  // Skip over a truncate of the shift amount.
+  if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
+    ShiftAmt = ShiftAmt->getOperand(0);
+
+  // Special case to avoid messing up a BZHI pattern.
+  // Look for (srl (shl X, (size - y)), (size - y)
+  if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) &&
+      N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL &&
+      // Shift amounts the same?
+      N->getOperand(1) == N->getOperand(0).getOperand(1) &&
+      // Shift amounts size - y?
+      ShiftAmt.getOpcode() == ISD::SUB &&
+      isa<ConstantSDNode>(ShiftAmt.getOperand(0)) &&
+      cast<ConstantSDNode>(ShiftAmt.getOperand(0))->getZExtValue() == Size)
+    return false;
+
+  SDValue NewShiftAmt;
+  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
+    SDValue Add0 = ShiftAmt->getOperand(0);
+    SDValue Add1 = ShiftAmt->getOperand(1);
+    // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+    // to avoid the ADD/SUB.
+    if (isa<ConstantSDNode>(Add1) &&
+        cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
+      NewShiftAmt = Add0;
+    // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+    // generate a NEG instead of a SUB of a constant.
+    } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+               isa<ConstantSDNode>(Add0) &&
+               cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
+               cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
+      // Insert a negate op.
+      // TODO: This isn't guaranteed to replace the sub if there is a logic cone
+      // that uses it that's not a shift.
+      EVT SubVT = ShiftAmt.getValueType();
+      SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
+      SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
+      NewShiftAmt = Neg;
+
+      // Insert these operands into a valid topological order so they can
+      // get selected independently.
+      insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
+      insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
+    } else
+      return false;
+  } else
+    return false;
+
+  if (NewShiftAmt.getValueType() != MVT::i8) {
+    // Need to truncate the shift amount.
+    NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
+    // Add to a correct topological ordering.
+    insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+  }
+
+  // Insert a new mask to keep the shift amount legal. This should be removed
+  // by isel patterns.
+  NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
+                                CurDAG->getConstant(Size - 1, DL, MVT::i8));
+  // Place in a correct topological ordering.
+  insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+
+  SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+                                                   NewShiftAmt);
+  if (UpdatedNode != N) {
+    // If we found an existing node, we should replace ourselves with that node
+    // and wait for it to be selected after its other users.
+    ReplaceNode(N, UpdatedNode);
+    return true;
+  }
+
+  // If the original shift amount is now dead, delete it so that we don't run
+  // it through isel.
+  if (OrigShiftAmt.getNode()->use_empty())
+    CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
+
+  // Now that we've optimized the shift amount, defer to normal isel to get
+  // load folding and legacy vs BMI2 selection without repeating it here.
+  SelectCode(N);
+  return true;
+}
+
 /// If the high bits of an 'and' operand are known zero, try setting the
 /// high bits of an 'and' constant operand to produce a smaller encoding by
 /// creating a small, sign-extended negative immediate rather than a large
@@ -2820,6 +2917,13 @@ void X86DAGToDAGISel::Select(SDNode *Nod
     return;
   }
 
+  case ISD::SRL:
+  case ISD::SRA:
+  case ISD::SHL:
+    if (tryShiftAmountMod(Node))
+      return;
+    break;
+
   case ISD::AND:
     if (matchBEXTRFromAnd(Node))
       return;

Modified: llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear-lowbits.ll?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear-lowbits.ll Wed Aug 22 12:39:09 2018
@@ -1108,7 +1108,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ; X86-NOBMI2-LABEL: clear_lowbits32_ic0:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT:    movl $32, %ecx
+; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1117,7 +1117,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_ic0:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movl $32, %eax
+; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
@@ -1125,8 +1125,8 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic0:
 ; X64-NOBMI2:       # %bb.0:
-; X64-NOBMI2-NEXT:    movl $32, %ecx
-; X64-NOBMI2-NEXT:    subl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %edi
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %edi
@@ -1135,10 +1135,9 @@ define i32 @clear_lowbits32_ic0(i32 %val
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic0:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movl $32, %eax
-; X64-BMI2-NEXT:    subl %esi, %eax
-; X64-BMI2-NEXT:    shrxl %eax, %edi, %ecx
-; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT:    negl %esi
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = shl i32 -1, %numhighbits
@@ -1150,15 +1149,16 @@ define i32 @clear_lowbits32_ic1_indexzex
 ; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT:    movb $32, %cl
+; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movb $32, %al
+; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %al
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
@@ -1166,19 +1166,19 @@ define i32 @clear_lowbits32_ic1_indexzex
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
-; X64-NOBMI2-NEXT:    movb $32, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    negb %cl
 ; X64-NOBMI2-NEXT:    shrl %cl, %edi
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %edi
 ; X64-NOBMI2-NEXT:    movl %edi, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movb $32, %al
-; X64-BMI2-NEXT:    subb %sil, %al
-; X64-BMI2-NEXT:    shrxl %eax, %edi, %ecx
-; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 32, %numlowbits
   %sh_prom = zext i8 %numhighbits to i32
@@ -1192,7 +1192,7 @@ define i32 @clear_lowbits32_ic2_load(i32
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
-; X86-NOBMI2-NEXT:    movl $32, %ecx
+; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1202,7 +1202,7 @@ define i32 @clear_lowbits32_ic2_load(i32
 ; X86-BMI2-LABEL: clear_lowbits32_ic2_load:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movl $32, %ecx
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
@@ -1210,9 +1210,9 @@ define i32 @clear_lowbits32_ic2_load(i32
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load:
 ; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl (%rdi), %eax
-; X64-NOBMI2-NEXT:    movl $32, %ecx
-; X64-NOBMI2-NEXT:    subl %esi, %ecx
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
@@ -1220,10 +1220,9 @@ define i32 @clear_lowbits32_ic2_load(i32
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movl $32, %eax
-; X64-BMI2-NEXT:    subl %esi, %eax
-; X64-BMI2-NEXT:    shrxl %eax, (%rdi), %ecx
-; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT:    negl %esi
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i32 32, %numlowbits
@@ -1237,16 +1236,17 @@ define i32 @clear_lowbits32_ic3_load_ind
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI2-NEXT:    movl (%eax), %eax
-; X86-NOBMI2-NEXT:    movb $32, %cl
+; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
+; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI2-NEXT:    shll %cl, %eax
 ; X86-NOBMI2-NEXT:    retl
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
 ; X86-BMI2:       # %bb.0:
 ; X86-BMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    movb $32, %cl
+; X86-BMI2-NEXT:    xorl %ecx, %ecx
 ; X86-BMI2-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-BMI2-NEXT:    shrxl %ecx, (%eax), %eax
 ; X86-BMI2-NEXT:    shlxl %ecx, %eax, %eax
@@ -1254,19 +1254,19 @@ define i32 @clear_lowbits32_ic3_load_ind
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
 ; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movl (%rdi), %eax
-; X64-NOBMI2-NEXT:    movb $32, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    negb %cl
 ; X64-NOBMI2-NEXT:    shrl %cl, %eax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %eax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movb $32, %al
-; X64-BMI2-NEXT:    subb %sil, %al
-; X64-BMI2-NEXT:    shrxl %eax, (%rdi), %ecx
-; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
   %val = load i32, i32* %w
   %numhighbits = sub i8 32, %numlowbits
@@ -1280,7 +1280,7 @@ define i32 @clear_lowbits32_ic4_commutat
 ; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X86-NOBMI2:       # %bb.0:
 ; X86-NOBMI2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT:    movl $32, %ecx
+; X86-NOBMI2-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI2-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI2-NEXT:    shrl %cl, %eax
 ; X86-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1289,7 +1289,7 @@ define i32 @clear_lowbits32_ic4_commutat
 ;
 ; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    movl $32, %eax
+; X86-BMI2-NEXT:    xorl %eax, %eax
 ; X86-BMI2-NEXT:    subl {{[0-9]+}}(%esp), %eax
 ; X86-BMI2-NEXT:    shrxl %eax, {{[0-9]+}}(%esp), %ecx
 ; X86-BMI2-NEXT:    shlxl %eax, %ecx, %eax
@@ -1297,8 +1297,8 @@ define i32 @clear_lowbits32_ic4_commutat
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X64-NOBMI2:       # %bb.0:
-; X64-NOBMI2-NEXT:    movl $32, %ecx
-; X64-NOBMI2-NEXT:    subl %esi, %ecx
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrl %cl, %edi
 ; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shll %cl, %edi
@@ -1307,10 +1307,9 @@ define i32 @clear_lowbits32_ic4_commutat
 ;
 ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movl $32, %eax
-; X64-BMI2-NEXT:    subl %esi, %eax
-; X64-BMI2-NEXT:    shrxl %eax, %edi, %ecx
-; X64-BMI2-NEXT:    shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT:    negl %esi
+; X64-BMI2-NEXT:    shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT:    shlxl %esi, %eax, %eax
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i32 32, %numlowbits
   %mask = shl i32 -1, %numhighbits
@@ -1358,20 +1357,19 @@ define i64 @clear_lowbits64_ic0(i64 %val
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
 ; X64-NOBMI2:       # %bb.0:
-; X64-NOBMI2-NEXT:    movl $64, %ecx
-; X64-NOBMI2-NEXT:    subl %esi, %ecx
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic0:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movl $64, %eax
-; X64-BMI2-NEXT:    subl %esi, %eax
-; X64-BMI2-NEXT:    shrxq %rax, %rdi, %rcx
-; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT:    negl %esi
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = shl i64 -1, %numhighbits
@@ -1417,19 +1415,20 @@ define i64 @clear_lowbits64_ic1_indexzex
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
 ; X64-NOBMI2:       # %bb.0:
-; X64-NOBMI2-NEXT:    movb $64, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
+; X64-NOBMI2-NEXT:    negb %cl
 ; X64-NOBMI2-NEXT:    shrq %cl, %rdi
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movb $64, %al
-; X64-BMI2-NEXT:    subb %sil, %al
-; X64-BMI2-NEXT:    shrxq %rax, %rdi, %rcx
-; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i8 64, %numlowbits
   %sh_prom = zext i8 %numhighbits to i64
@@ -1482,20 +1481,19 @@ define i64 @clear_lowbits64_ic2_load(i64
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
 ; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI2-NEXT:    movq (%rdi), %rax
-; X64-NOBMI2-NEXT:    movl $64, %ecx
-; X64-NOBMI2-NEXT:    subl %esi, %ecx
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rax
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movl $64, %eax
-; X64-BMI2-NEXT:    subl %esi, %eax
-; X64-BMI2-NEXT:    shrxq %rax, (%rdi), %rcx
-; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT:    negl %esi
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i64 64, %numlowbits
@@ -1548,19 +1546,20 @@ define i64 @clear_lowbits64_ic3_load_ind
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
 ; X64-NOBMI2:       # %bb.0:
+; X64-NOBMI2-NEXT:    movl %esi, %ecx
 ; X64-NOBMI2-NEXT:    movq (%rdi), %rax
-; X64-NOBMI2-NEXT:    movb $64, %cl
-; X64-NOBMI2-NEXT:    subb %sil, %cl
+; X64-NOBMI2-NEXT:    negb %cl
 ; X64-NOBMI2-NEXT:    shrq %cl, %rax
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movb $64, %al
-; X64-BMI2-NEXT:    subb %sil, %al
-; X64-BMI2-NEXT:    shrxq %rax, (%rdi), %rcx
-; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT:    negb %sil
+; X64-BMI2-NEXT:    shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
   %val = load i64, i64* %w
   %numhighbits = sub i8 64, %numlowbits
@@ -1608,20 +1607,19 @@ define i64 @clear_lowbits64_ic4_commutat
 ;
 ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
 ; X64-NOBMI2:       # %bb.0:
-; X64-NOBMI2-NEXT:    movl $64, %ecx
-; X64-NOBMI2-NEXT:    subl %esi, %ecx
+; X64-NOBMI2-NEXT:    movq %rsi, %rcx
+; X64-NOBMI2-NEXT:    negl %ecx
 ; X64-NOBMI2-NEXT:    shrq %cl, %rdi
-; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI2-NEXT:    shlq %cl, %rdi
 ; X64-NOBMI2-NEXT:    movq %rdi, %rax
 ; X64-NOBMI2-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movl $64, %eax
-; X64-BMI2-NEXT:    subl %esi, %eax
-; X64-BMI2-NEXT:    shrxq %rax, %rdi, %rcx
-; X64-BMI2-NEXT:    shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT:    negl %esi
+; X64-BMI2-NEXT:    shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT:    shlxq %rsi, %rax, %rax
 ; X64-BMI2-NEXT:    retq
   %numhighbits = sub i64 64, %numlowbits
   %mask = shl i64 -1, %numhighbits

Modified: llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-lowbits.ll?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-lowbits.ll Wed Aug 22 12:39:09 2018
@@ -1016,7 +1016,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ; X86-NOBMI-LABEL: bzhi32_c0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl $32, %ecx
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1031,8 +1031,8 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
 ;
 ; X64-NOBMI-LABEL: bzhi32_c0:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $32, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %edi
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %edi
@@ -1053,9 +1053,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val
 ; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movb $32, %cl
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -1067,9 +1068,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val
 ;
 ; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movb $32, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %edi
 ; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
@@ -1090,7 +1092,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
-; X86-NOBMI-NEXT:    movl $32, %ecx
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1106,9 +1108,9 @@ define i32 @bzhi32_c2_load(i32* %w, i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_c2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl (%rdi), %eax
-; X64-NOBMI-NEXT:    movl $32, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -1130,9 +1132,10 @@ define i32 @bzhi32_c3_load_indexzext(i32
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
-; X86-NOBMI-NEXT:    movb $32, %cl
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -1145,10 +1148,11 @@ define i32 @bzhi32_c3_load_indexzext(i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl (%rdi), %eax
-; X64-NOBMI-NEXT:    movb $32, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -1168,7 +1172,7 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ; X86-NOBMI-LABEL: bzhi32_c4_commutative:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl $32, %ecx
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1183,8 +1187,8 @@ define i32 @bzhi32_c4_commutative(i32 %v
 ;
 ; X64-NOBMI-LABEL: bzhi32_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $32, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %edi
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %edi
@@ -1241,10 +1245,10 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
 ;
 ; X64-NOBMI-LABEL: bzhi64_c0:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $64, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rdi
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
@@ -1297,9 +1301,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val
 ;
 ; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movb $64, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
@@ -1360,11 +1365,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_c2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq (%rdi), %rax
-; X64-NOBMI-NEXT:    movl $64, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -1423,10 +1428,11 @@ define i64 @bzhi64_c3_load_indexzext(i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movq (%rdi), %rax
-; X64-NOBMI-NEXT:    movb $64, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -1481,10 +1487,10 @@ define i64 @bzhi64_c4_commutative(i64 %v
 ;
 ; X64-NOBMI-LABEL: bzhi64_c4_commutative:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $64, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rdi
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
@@ -1507,7 +1513,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
 ; X86-NOBMI-LABEL: bzhi32_d0:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl $32, %ecx
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1522,8 +1528,8 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
 ;
 ; X64-NOBMI-LABEL: bzhi32_d0:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $32, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %edi
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %edi
@@ -1544,9 +1550,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val
 ; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movb $32, %cl
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -1558,9 +1565,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val
 ;
 ; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movb $32, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shll %cl, %edi
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %edi
 ; X64-NOBMI-NEXT:    movl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
@@ -1581,7 +1589,7 @@ define i32 @bzhi32_d2_load(i32* %w, i32
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
-; X86-NOBMI-NEXT:    movl $32, %ecx
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    shll %cl, %eax
 ; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
@@ -1597,9 +1605,9 @@ define i32 @bzhi32_d2_load(i32* %w, i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_d2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl (%rdi), %eax
-; X64-NOBMI-NEXT:    movl $32, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shll %cl, %eax
 ; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
@@ -1621,9 +1629,10 @@ define i32 @bzhi32_d3_load_indexzext(i32
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    movl (%eax), %eax
-; X86-NOBMI-NEXT:    movb $32, %cl
+; X86-NOBMI-NEXT:    xorl %ecx, %ecx
 ; X86-NOBMI-NEXT:    subb {{[0-9]+}}(%esp), %cl
 ; X86-NOBMI-NEXT:    shll %cl, %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X86-NOBMI-NEXT:    shrl %cl, %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -1636,10 +1645,11 @@ define i32 @bzhi32_d3_load_indexzext(i32
 ;
 ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movl (%rdi), %eax
-; X64-NOBMI-NEXT:    movb $32, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shll %cl, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrl %cl, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -1731,10 +1741,10 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
 ;
 ; X64-NOBMI-LABEL: bzhi64_d0:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movl $64, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rdi
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
@@ -1823,9 +1833,10 @@ define i64 @bzhi64_d1_indexzext(i64 %val
 ;
 ; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movb $64, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    movl %esi, %ecx
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shlq %cl, %rdi
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rdi
 ; X64-NOBMI-NEXT:    movq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
@@ -1918,11 +1929,11 @@ define i64 @bzhi64_d2_load(i64* %w, i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_d2_load:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
 ; X64-NOBMI-NEXT:    movq (%rdi), %rax
-; X64-NOBMI-NEXT:    movl $64, %ecx
-; X64-NOBMI-NEXT:    subl %esi, %ecx
+; X64-NOBMI-NEXT:    negl %ecx
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
-; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -2013,10 +2024,11 @@ define i64 @bzhi64_d3_load_indexzext(i64
 ;
 ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
 ; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %ecx
 ; X64-NOBMI-NEXT:    movq (%rdi), %rax
-; X64-NOBMI-NEXT:    movb $64, %cl
-; X64-NOBMI-NEXT:    subb %sil, %cl
+; X64-NOBMI-NEXT:    negb %cl
 ; X64-NOBMI-NEXT:    shlq %cl, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NOBMI-NEXT:    shrq %cl, %rax
 ; X64-NOBMI-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll Wed Aug 22 12:39:09 2018
@@ -162,11 +162,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
 ;
 ; BTVER2-LABEL: lshift_cl:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    movl %edx, %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT:    movl $64, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    subl %edx, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shrq %cl, %rsi # sched: [1:0.50]
 ; BTVER2-NEXT:    orq %rdi, %rsi # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
@@ -174,11 +173,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
 ;
 ; BDVER1-LABEL: lshift_cl:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    movl %edx, %ecx
+; BDVER1-NEXT:    movq %rdx, %rcx
 ; BDVER1-NEXT:    shlq %cl, %rdi
-; BDVER1-NEXT:    movl $64, %ecx
-; BDVER1-NEXT:    subl %edx, %ecx
-; BDVER1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; BDVER1-NEXT:    negl %ecx
+; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER1-NEXT:    shrq %cl, %rsi
 ; BDVER1-NEXT:    orq %rdi, %rsi
 ; BDVER1-NEXT:    movq %rsi, %rax
@@ -236,11 +234,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
 ;
 ; BTVER2-LABEL: rshift_cl:
 ; BTVER2:       # %bb.0: # %entry
-; BTVER2-NEXT:    movl %edx, %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rdx, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT:    movl $64, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    subl %edx, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shlq %cl, %rsi # sched: [1:0.50]
 ; BTVER2-NEXT:    orq %rdi, %rsi # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rsi, %rax # sched: [1:0.50]
@@ -248,11 +245,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
 ;
 ; BDVER1-LABEL: rshift_cl:
 ; BDVER1:       # %bb.0: # %entry
-; BDVER1-NEXT:    movl %edx, %ecx
+; BDVER1-NEXT:    movq %rdx, %rcx
 ; BDVER1-NEXT:    shrq %cl, %rdi
-; BDVER1-NEXT:    movl $64, %ecx
-; BDVER1-NEXT:    subl %edx, %ecx
-; BDVER1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; BDVER1-NEXT:    negl %ecx
+; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER1-NEXT:    shlq %cl, %rsi
 ; BDVER1-NEXT:    orq %rdi, %rsi
 ; BDVER1-NEXT:    movq %rsi, %rax
@@ -310,11 +306,10 @@ define void @lshift_mem_cl(i64 %a, i64 %
 ; BTVER2-LABEL: lshift_mem_cl:
 ; BTVER2:       # %bb.0: # %entry
 ; BTVER2-NEXT:    movq {{.*}}(%rip), %rax # sched: [5:1.00]
-; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    movq %rsi, %rcx # sched: [1:0.50]
 ; BTVER2-NEXT:    shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT:    movl $64, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    subl %esi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; BTVER2-NEXT:    negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BTVER2-NEXT:    shrq %cl, %rdi # sched: [1:0.50]
 ; BTVER2-NEXT:    orq %rax, %rdi # sched: [1:0.50]
 ; BTVER2-NEXT:    movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
@@ -322,12 +317,11 @@ define void @lshift_mem_cl(i64 %a, i64 %
 ;
 ; BDVER1-LABEL: lshift_mem_cl:
 ; BDVER1:       # %bb.0: # %entry
+; BDVER1-NEXT:    movq %rsi, %rcx
 ; BDVER1-NEXT:    movq {{.*}}(%rip), %rax
-; BDVER1-NEXT:    movl %esi, %ecx
 ; BDVER1-NEXT:    shlq %cl, %rax
-; BDVER1-NEXT:    movl $64, %ecx
-; BDVER1-NEXT:    subl %esi, %ecx
-; BDVER1-NEXT:    # kill: def $cl killed $cl killed $ecx
+; BDVER1-NEXT:    negl %ecx
+; BDVER1-NEXT:    # kill: def $cl killed $cl killed $rcx
 ; BDVER1-NEXT:    shrq %cl, %rdi
 ; BDVER1-NEXT:    orq %rax, %rdi
 ; BDVER1-NEXT:    movq %rdi, {{.*}}(%rip)




More information about the llvm-commits mailing list