[llvm] r340441 - [X86] Replace (32/64 - n) shift amounts with (neg n) since the shift amount is masked in hardware
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 22 12:39:09 PDT 2018
Author: ctopper
Date: Wed Aug 22 12:39:09 2018
New Revision: 340441
URL: http://llvm.org/viewvc/llvm-project?rev=340441&view=rev
Log:
[X86] Replace (32/64 - n) shift amounts with (neg n) since the shift amount is masked in hardware
Inspired by what AArch64 does for shifts, this patch attempts to replace shift amounts with neg if we can.
This is done directly as part of isel so its as late as possible to avoid breaking some BZHI patterns since those patterns need an unmasked (32-n) to be correct.
To avoid manual load folding and custom instruction selection for the negate. I've inserted new nodes in the DAG above the shift node in topological order.
Differential Revision: https://reviews.llvm.org/D48789
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Aug 22 12:39:09 2018
@@ -456,6 +456,7 @@ namespace {
bool matchBEXTRFromAnd(SDNode *Node);
bool shrinkAndImmediate(SDNode *N);
bool isMaskZeroExtended(SDNode *N) const;
+ bool tryShiftAmountMod(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
@@ -2690,6 +2691,102 @@ MachineSDNode *X86DAGToDAGISel::emitPCMP
return CNode;
}
+bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
+ EVT VT = N->getValueType(0);
+
+ // Only handle scalar shifts.
+ if (VT.isVector())
+ return false;
+
+ // Narrower shifts only mask to 5 bits in hardware.
+ unsigned Size = VT == MVT::i64 ? 64 : 32;
+
+ SDValue OrigShiftAmt = N->getOperand(1);
+ SDValue ShiftAmt = OrigShiftAmt;
+ SDLoc DL(N);
+
+ // Skip over a truncate of the shift amount.
+ if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
+ ShiftAmt = ShiftAmt->getOperand(0);
+
+ // Special case to avoid messing up a BZHI pattern.
+ // Look for (srl (shl X, (size - y)), (size - y)
+ if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) &&
+ N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL &&
+ // Shift amounts the same?
+ N->getOperand(1) == N->getOperand(0).getOperand(1) &&
+ // Shift amounts size - y?
+ ShiftAmt.getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(ShiftAmt.getOperand(0)) &&
+ cast<ConstantSDNode>(ShiftAmt.getOperand(0))->getZExtValue() == Size)
+ return false;
+
+ SDValue NewShiftAmt;
+ if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
+ SDValue Add0 = ShiftAmt->getOperand(0);
+ SDValue Add1 = ShiftAmt->getOperand(1);
+ // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
+ // to avoid the ADD/SUB.
+ if (isa<ConstantSDNode>(Add1) &&
+ cast<ConstantSDNode>(Add1)->getZExtValue() % Size == 0) {
+ NewShiftAmt = Add0;
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ } else if (ShiftAmt->getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(Add0) &&
+ cast<ConstantSDNode>(Add0)->getZExtValue() != 0 &&
+ cast<ConstantSDNode>(Add0)->getZExtValue() % Size == 0) {
+ // Insert a negate op.
+ // TODO: This isn't guaranteed to replace the sub if there is a logic cone
+ // that uses it that's not a shift.
+ EVT SubVT = ShiftAmt.getValueType();
+ SDValue Zero = CurDAG->getConstant(0, DL, SubVT);
+ SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1);
+ NewShiftAmt = Neg;
+
+ // Insert these operands into a valid topological order so they can
+ // get selected independently.
+ insertDAGNode(*CurDAG, OrigShiftAmt, Zero);
+ insertDAGNode(*CurDAG, OrigShiftAmt, Neg);
+ } else
+ return false;
+ } else
+ return false;
+
+ if (NewShiftAmt.getValueType() != MVT::i8) {
+ // Need to truncate the shift amount.
+ NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt);
+ // Add to a correct topological ordering.
+ insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+ }
+
+ // Insert a new mask to keep the shift amount legal. This should be removed
+ // by isel patterns.
+ NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt,
+ CurDAG->getConstant(Size - 1, DL, MVT::i8));
+ // Place in a correct topological ordering.
+ insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
+
+ SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+ NewShiftAmt);
+ if (UpdatedNode != N) {
+ // If we found an existing node, we should replace ourselves with that node
+ // and wait for it to be selected after its other users.
+ ReplaceNode(N, UpdatedNode);
+ return true;
+ }
+
+ // If the original shift amount is now dead, delete it so that we don't run
+ // it through isel.
+ if (OrigShiftAmt.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(OrigShiftAmt.getNode());
+
+ // Now that we've optimized the shift amount, defer to normal isel to get
+ // load folding and legacy vs BMI2 selection without repeating it here.
+ SelectCode(N);
+ return true;
+}
+
/// If the high bits of an 'and' operand are known zero, try setting the
/// high bits of an 'and' constant operand to produce a smaller encoding by
/// creating a small, sign-extended negative immediate rather than a large
@@ -2820,6 +2917,13 @@ void X86DAGToDAGISel::Select(SDNode *Nod
return;
}
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::SHL:
+ if (tryShiftAmountMod(Node))
+ return;
+ break;
+
case ISD::AND:
if (matchBEXTRFromAnd(Node))
return;
Modified: llvm/trunk/test/CodeGen/X86/clear-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/clear-lowbits.ll?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/clear-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/clear-lowbits.ll Wed Aug 22 12:39:09 2018
@@ -1108,7 +1108,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
; X86-NOBMI2-LABEL: clear_lowbits32_ic0:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT: movl $32, %ecx
+; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1117,7 +1117,7 @@ define i32 @clear_lowbits32_ic0(i32 %val
;
; X86-BMI2-LABEL: clear_lowbits32_ic0:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movl $32, %eax
+; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
@@ -1125,8 +1125,8 @@ define i32 @clear_lowbits32_ic0(i32 %val
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic0:
; X64-NOBMI2: # %bb.0:
-; X64-NOBMI2-NEXT: movl $32, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movl %esi, %ecx
+; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrl %cl, %edi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %edi
@@ -1135,10 +1135,9 @@ define i32 @clear_lowbits32_ic0(i32 %val
;
; X64-BMI2-LABEL: clear_lowbits32_ic0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $32, %eax
-; X64-BMI2-NEXT: subl %esi, %eax
-; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
-; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%numhighbits = sub i32 32, %numlowbits
%mask = shl i32 -1, %numhighbits
@@ -1150,15 +1149,16 @@ define i32 @clear_lowbits32_ic1_indexzex
; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT: movb $32, %cl
+; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movb $32, %al
+; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
@@ -1166,19 +1166,19 @@ define i32 @clear_lowbits32_ic1_indexzex
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X64-NOBMI2: # %bb.0:
-; X64-NOBMI2-NEXT: movb $32, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: movl %esi, %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %edi
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %edi
; X64-NOBMI2-NEXT: movl %edi, %eax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movb $32, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
-; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT: negb %sil
+; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
@@ -1192,7 +1192,7 @@ define i32 @clear_lowbits32_ic2_load(i32
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
-; X86-NOBMI2-NEXT: movl $32, %ecx
+; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1202,7 +1202,7 @@ define i32 @clear_lowbits32_ic2_load(i32
; X86-BMI2-LABEL: clear_lowbits32_ic2_load:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movl $32, %ecx
+; X86-BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
@@ -1210,9 +1210,9 @@ define i32 @clear_lowbits32_ic2_load(i32
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load:
; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl (%rdi), %eax
-; X64-NOBMI2-NEXT: movl $32, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrl %cl, %eax
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
@@ -1220,10 +1220,9 @@ define i32 @clear_lowbits32_ic2_load(i32
;
; X64-BMI2-LABEL: clear_lowbits32_ic2_load:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $32, %eax
-; X64-BMI2-NEXT: subl %esi, %eax
-; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx
-; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i32 32, %numlowbits
@@ -1237,16 +1236,17 @@ define i32 @clear_lowbits32_ic3_load_ind
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI2-NEXT: movl (%eax), %eax
-; X86-NOBMI2-NEXT: movb $32, %cl
+; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI2-NEXT: shrl %cl, %eax
+; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI2-NEXT: shll %cl, %eax
; X86-NOBMI2-NEXT: retl
;
; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
; X86-BMI2: # %bb.0:
; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT: movb $32, %cl
+; X86-BMI2-NEXT: xorl %ecx, %ecx
; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax
; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax
@@ -1254,19 +1254,19 @@ define i32 @clear_lowbits32_ic3_load_ind
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movl (%rdi), %eax
-; X64-NOBMI2-NEXT: movb $32, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrl %cl, %eax
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %eax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movb $32, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx
-; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT: negb %sil
+; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax
+; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i8 32, %numlowbits
@@ -1280,7 +1280,7 @@ define i32 @clear_lowbits32_ic4_commutat
; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
; X86-NOBMI2: # %bb.0:
; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI2-NEXT: movl $32, %ecx
+; X86-NOBMI2-NEXT: xorl %ecx, %ecx
; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI2-NEXT: shrl %cl, %eax
; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1289,7 +1289,7 @@ define i32 @clear_lowbits32_ic4_commutat
;
; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative:
; X86-BMI2: # %bb.0:
-; X86-BMI2-NEXT: movl $32, %eax
+; X86-BMI2-NEXT: xorl %eax, %eax
; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax
; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx
; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax
@@ -1297,8 +1297,8 @@ define i32 @clear_lowbits32_ic4_commutat
;
; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative:
; X64-NOBMI2: # %bb.0:
-; X64-NOBMI2-NEXT: movl $32, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movl %esi, %ecx
+; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrl %cl, %edi
; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shll %cl, %edi
@@ -1307,10 +1307,9 @@ define i32 @clear_lowbits32_ic4_commutat
;
; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $32, %eax
-; X64-BMI2-NEXT: subl %esi, %eax
-; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx
-; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax
+; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: shrxl %esi, %edi, %eax
+; X64-BMI2-NEXT: shlxl %esi, %eax, %eax
; X64-BMI2-NEXT: retq
%numhighbits = sub i32 32, %numlowbits
%mask = shl i32 -1, %numhighbits
@@ -1358,20 +1357,19 @@ define i64 @clear_lowbits64_ic0(i64 %val
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic0:
; X64-NOBMI2: # %bb.0:
-; X64-NOBMI2-NEXT: movl $64, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrq %cl, %rdi
-; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rdi
; X64-NOBMI2-NEXT: movq %rdi, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic0:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $64, %eax
-; X64-BMI2-NEXT: subl %esi, %eax
-; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
-; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%numhighbits = sub i64 64, %numlowbits
%mask = shl i64 -1, %numhighbits
@@ -1417,19 +1415,20 @@ define i64 @clear_lowbits64_ic1_indexzex
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext:
; X64-NOBMI2: # %bb.0:
-; X64-NOBMI2-NEXT: movb $64, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: movl %esi, %ecx
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rdi
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shlq %cl, %rdi
; X64-NOBMI2-NEXT: movq %rdi, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movb $64, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
-; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT: negb %sil
+; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
@@ -1482,20 +1481,19 @@ define i64 @clear_lowbits64_ic2_load(i64
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load:
; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
; X64-NOBMI2-NEXT: movq (%rdi), %rax
-; X64-NOBMI2-NEXT: movl $64, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrq %cl, %rax
-; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic2_load:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $64, %eax
-; X64-BMI2-NEXT: subl %esi, %eax
-; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx
-; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i64 64, %numlowbits
@@ -1548,19 +1546,20 @@ define i64 @clear_lowbits64_ic3_load_ind
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X64-NOBMI2: # %bb.0:
+; X64-NOBMI2-NEXT: movl %esi, %ecx
; X64-NOBMI2-NEXT: movq (%rdi), %rax
-; X64-NOBMI2-NEXT: movb $64, %cl
-; X64-NOBMI2-NEXT: subb %sil, %cl
+; X64-NOBMI2-NEXT: negb %cl
; X64-NOBMI2-NEXT: shrq %cl, %rax
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI2-NEXT: shlq %cl, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movb $64, %al
-; X64-BMI2-NEXT: subb %sil, %al
-; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx
-; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI2-NEXT: negb %sil
+; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i8 64, %numlowbits
@@ -1608,20 +1607,19 @@ define i64 @clear_lowbits64_ic4_commutat
;
; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative:
; X64-NOBMI2: # %bb.0:
-; X64-NOBMI2-NEXT: movl $64, %ecx
-; X64-NOBMI2-NEXT: subl %esi, %ecx
+; X64-NOBMI2-NEXT: movq %rsi, %rcx
+; X64-NOBMI2-NEXT: negl %ecx
; X64-NOBMI2-NEXT: shrq %cl, %rdi
-; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI2-NEXT: shlq %cl, %rdi
; X64-NOBMI2-NEXT: movq %rdi, %rax
; X64-NOBMI2-NEXT: retq
;
; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative:
; X64-BMI2: # %bb.0:
-; X64-BMI2-NEXT: movl $64, %eax
-; X64-BMI2-NEXT: subl %esi, %eax
-; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx
-; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax
+; X64-BMI2-NEXT: negl %esi
+; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax
+; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax
; X64-BMI2-NEXT: retq
%numhighbits = sub i64 64, %numlowbits
%mask = shl i64 -1, %numhighbits
Modified: llvm/trunk/test/CodeGen/X86/extract-lowbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-lowbits.ll?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-lowbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-lowbits.ll Wed Aug 22 12:39:09 2018
@@ -1016,7 +1016,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
; X86-NOBMI-LABEL: bzhi32_c0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movl $32, %ecx
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1031,8 +1031,8 @@ define i32 @bzhi32_c0(i32 %val, i32 %num
;
; X64-NOBMI-LABEL: bzhi32_c0:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl $32, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
@@ -1053,9 +1053,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val
; X86-NOBMI-LABEL: bzhi32_c1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movb $32, %cl
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
+; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@@ -1067,9 +1068,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val
;
; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movb $32, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %edi
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
; X64-NOBMI-NEXT: movl %edi, %eax
; X64-NOBMI-NEXT: retq
@@ -1090,7 +1092,7 @@ define i32 @bzhi32_c2_load(i32* %w, i32
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
-; X86-NOBMI-NEXT: movl $32, %ecx
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1106,9 +1108,9 @@ define i32 @bzhi32_c2_load(i32* %w, i32
;
; X64-NOBMI-LABEL: bzhi32_c2_load:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
-; X64-NOBMI-NEXT: movl $32, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
@@ -1130,9 +1132,10 @@ define i32 @bzhi32_c3_load_indexzext(i32
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
-; X86-NOBMI-NEXT: movb $32, %cl
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
+; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@@ -1145,10 +1148,11 @@ define i32 @bzhi32_c3_load_indexzext(i32
;
; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
-; X64-NOBMI-NEXT: movb $32, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %eax
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X64-NOBMI-NEXT: retq
;
@@ -1168,7 +1172,7 @@ define i32 @bzhi32_c4_commutative(i32 %v
; X86-NOBMI-LABEL: bzhi32_c4_commutative:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movl $32, %ecx
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1183,8 +1187,8 @@ define i32 @bzhi32_c4_commutative(i32 %v
;
; X64-NOBMI-LABEL: bzhi32_c4_commutative:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl $32, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
@@ -1241,10 +1245,10 @@ define i64 @bzhi64_c0(i64 %val, i64 %num
;
; X64-NOBMI-LABEL: bzhi64_c0:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl $64, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: movq %rsi, %rcx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rdi
-; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@@ -1297,9 +1301,10 @@ define i64 @bzhi64_c1_indexzext(i64 %val
;
; X64-NOBMI-LABEL: bzhi64_c1_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movb $64, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rdi
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@@ -1360,11 +1365,11 @@ define i64 @bzhi64_c2_load(i64* %w, i64
;
; X64-NOBMI-LABEL: bzhi64_c2_load:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq (%rdi), %rax
-; X64-NOBMI-NEXT: movl $64, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rax
-; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
@@ -1423,10 +1428,11 @@ define i64 @bzhi64_c3_load_indexzext(i64
;
; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movq (%rdi), %rax
-; X64-NOBMI-NEXT: movb $64, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rax
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
@@ -1481,10 +1487,10 @@ define i64 @bzhi64_c4_commutative(i64 %v
;
; X64-NOBMI-LABEL: bzhi64_c4_commutative:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl $64, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: movq %rsi, %rcx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rdi
-; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@@ -1507,7 +1513,7 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
; X86-NOBMI-LABEL: bzhi32_d0:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movl $32, %ecx
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1522,8 +1528,8 @@ define i32 @bzhi32_d0(i32 %val, i32 %num
;
; X64-NOBMI-LABEL: bzhi32_d0:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl $32, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %edi
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
@@ -1544,9 +1550,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val
; X86-NOBMI-LABEL: bzhi32_d1_indexzext:
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT: movb $32, %cl
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
+; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@@ -1558,9 +1565,10 @@ define i32 @bzhi32_d1_indexzext(i32 %val
;
; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movb $32, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %edi
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %edi
; X64-NOBMI-NEXT: movl %edi, %eax
; X64-NOBMI-NEXT: retq
@@ -1581,7 +1589,7 @@ define i32 @bzhi32_d2_load(i32* %w, i32
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
-; X86-NOBMI-NEXT: movl $32, %ecx
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NOBMI-NEXT: shll %cl, %eax
; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
@@ -1597,9 +1605,9 @@ define i32 @bzhi32_d2_load(i32* %w, i32
;
; X64-NOBMI-LABEL: bzhi32_d2_load:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
-; X64-NOBMI-NEXT: movl $32, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shll %cl, %eax
; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
@@ -1621,9 +1629,10 @@ define i32 @bzhi32_d3_load_indexzext(i32
; X86-NOBMI: # %bb.0:
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOBMI-NEXT: movl (%eax), %eax
-; X86-NOBMI-NEXT: movb $32, %cl
+; X86-NOBMI-NEXT: xorl %ecx, %ecx
; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl
; X86-NOBMI-NEXT: shll %cl, %eax
+; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NOBMI-NEXT: shrl %cl, %eax
; X86-NOBMI-NEXT: retl
;
@@ -1636,10 +1645,11 @@ define i32 @bzhi32_d3_load_indexzext(i32
;
; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movl (%rdi), %eax
-; X64-NOBMI-NEXT: movb $32, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shll %cl, %eax
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrl %cl, %eax
; X64-NOBMI-NEXT: retq
;
@@ -1731,10 +1741,10 @@ define i64 @bzhi64_d0(i64 %val, i64 %num
;
; X64-NOBMI-LABEL: bzhi64_d0:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movl $64, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: movq %rsi, %rcx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rdi
-; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@@ -1823,9 +1833,10 @@ define i64 @bzhi64_d1_indexzext(i64 %val
;
; X64-NOBMI-LABEL: bzhi64_d1_indexzext:
; X64-NOBMI: # %bb.0:
-; X64-NOBMI-NEXT: movb $64, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: movl %esi, %ecx
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rdi
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rdi
; X64-NOBMI-NEXT: movq %rdi, %rax
; X64-NOBMI-NEXT: retq
@@ -1918,11 +1929,11 @@ define i64 @bzhi64_d2_load(i64* %w, i64
;
; X64-NOBMI-LABEL: bzhi64_d2_load:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movq %rsi, %rcx
; X64-NOBMI-NEXT: movq (%rdi), %rax
-; X64-NOBMI-NEXT: movl $64, %ecx
-; X64-NOBMI-NEXT: subl %esi, %ecx
+; X64-NOBMI-NEXT: negl %ecx
; X64-NOBMI-NEXT: shlq %cl, %rax
-; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
@@ -2013,10 +2024,11 @@ define i64 @bzhi64_d3_load_indexzext(i64
;
; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext:
; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %esi, %ecx
; X64-NOBMI-NEXT: movq (%rdi), %rax
-; X64-NOBMI-NEXT: movb $64, %cl
-; X64-NOBMI-NEXT: subb %sil, %cl
+; X64-NOBMI-NEXT: negb %cl
; X64-NOBMI-NEXT: shlq %cl, %rax
+; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx
; X64-NOBMI-NEXT: shrq %cl, %rax
; X64-NOBMI-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll?rev=340441&r1=340440&r2=340441&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll (original)
+++ llvm/trunk/test/CodeGen/X86/schedule-x86-64-shld.ll Wed Aug 22 12:39:09 2018
@@ -162,11 +162,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
;
; BTVER2-LABEL: lshift_cl:
; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50]
+; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
+; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
@@ -174,11 +173,10 @@ define i64 @lshift_cl(i64 %a, i64 %b, i6
;
; BDVER1-LABEL: lshift_cl:
; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movl %edx, %ecx
+; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: shlq %cl, %rdi
-; BDVER1-NEXT: movl $64, %ecx
-; BDVER1-NEXT: subl %edx, %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
+; BDVER1-NEXT: negl %ecx
+; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrq %cl, %rsi
; BDVER1-NEXT: orq %rdi, %rsi
; BDVER1-NEXT: movq %rsi, %rax
@@ -236,11 +234,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
;
; BTVER2-LABEL: rshift_cl:
; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50]
+; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
+; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50]
; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
@@ -248,11 +245,10 @@ define i64 @rshift_cl(i64 %a, i64 %b, i6
;
; BDVER1-LABEL: rshift_cl:
; BDVER1: # %bb.0: # %entry
-; BDVER1-NEXT: movl %edx, %ecx
+; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: shrq %cl, %rdi
-; BDVER1-NEXT: movl $64, %ecx
-; BDVER1-NEXT: subl %edx, %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
+; BDVER1-NEXT: negl %ecx
+; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shlq %cl, %rsi
; BDVER1-NEXT: orq %rdi, %rsi
; BDVER1-NEXT: movq %rsi, %rax
@@ -310,11 +306,10 @@ define void @lshift_mem_cl(i64 %a, i64 %
; BTVER2-LABEL: lshift_mem_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
-; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
+; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx
+; BTVER2-NEXT: negl %ecx # sched: [1:0.50]
+; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
@@ -322,12 +317,11 @@ define void @lshift_mem_cl(i64 %a, i64 %
;
; BDVER1-LABEL: lshift_mem_cl:
; BDVER1: # %bb.0: # %entry
+; BDVER1-NEXT: movq %rsi, %rcx
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
-; BDVER1-NEXT: movl %esi, %ecx
; BDVER1-NEXT: shlq %cl, %rax
-; BDVER1-NEXT: movl $64, %ecx
-; BDVER1-NEXT: subl %esi, %ecx
-; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx
+; BDVER1-NEXT: negl %ecx
+; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrq %cl, %rdi
; BDVER1-NEXT: orq %rax, %rdi
; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
More information about the llvm-commits
mailing list