[llvm] r348959 - [X86] Emit SBB instead of SETCC_CARRY from LowerSELECT. Break false dependency on the SBB input.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 12 11:20:22 PST 2018
Author: ctopper
Date: Wed Dec 12 11:20:21 2018
New Revision: 348959
URL: http://llvm.org/viewvc/llvm-project?rev=348959&view=rev
Log:
[X86] Emit SBB instead of SETCC_CARRY from LowerSELECT. Break false dependency on the SBB input.
I'm hoping we can just replace SETCC_CARRY with SBB. This is another step towards that.
I've explicitly used zero as the input to the setcc to avoid a false dependency that we've had with the SETCC_CARRY. I changed one of the patterns that used NEG to instead use an explicit compare with 0 on the LHS. We needed the zero anyway to avoid the false dependency. The negate would clobber its input register. By using a CMP we can avoid that which could be useful.
Differential Revision: https://reviews.llvm.org/D55414
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrCompiler.td
llvm/trunk/test/CodeGen/X86/pr35972.ll
llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll
llvm/trunk/test/CodeGen/X86/select.ll
llvm/trunk/test/CodeGen/X86/shl-crash-on-legalize.ll
llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Dec 12 11:20:21 2018
@@ -19802,22 +19802,21 @@ SDValue X86TargetLowering::LowerSELECT(S
// (select (x == 0), 0, -1) -> neg & sbb
if (isNullConstant(Y) &&
(isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
- SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType());
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs, Zero, CmpOp0);
- SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- return Res;
+ SDValue Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Zero, CmpOp0);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ Zero = DAG.getConstant(0, DL, Op.getValueType());
+ return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
}
Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType()));
Cmp = ConvertCmpIfNecessary(Cmp, DAG);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SDValue Zero = DAG.getConstant(0, DL, Op.getValueType());
SDValue Res = // Res = 0 or -1.
- DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
- DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
+ DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp);
if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
Res = DAG.getNOT(DL, Res, Res.getValueType());
Modified: llvm/trunk/lib/Target/X86/X86InstrCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrCompiler.td?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td Wed Dec 12 11:20:21 2018
@@ -362,6 +362,21 @@ def : Pat<(i64 (sext (i8 (X86setcc_c X86
def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
(SETBr)>;
+// Patterns to give priority when both inputs are zero so that we don't use
+// an immediate for the RHS.
+// TODO: Should we use a 32-bit sbb for 8/16 to push the extract_subreg out?
+def : Pat<(X86sbb_flag (i8 0), (i8 0), EFLAGS),
+ (SBB8rr (EXTRACT_SUBREG (MOV32r0), sub_8bit),
+ (EXTRACT_SUBREG (MOV32r0), sub_8bit))>;
+def : Pat<(X86sbb_flag (i16 0), (i16 0), EFLAGS),
+ (SBB16rr (EXTRACT_SUBREG (MOV32r0), sub_16bit),
+ (EXTRACT_SUBREG (MOV32r0), sub_16bit))>;
+def : Pat<(X86sbb_flag (i32 0), (i32 0), EFLAGS),
+ (SBB32rr (MOV32r0), (MOV32r0))>;
+def : Pat<(X86sbb_flag (i64 0), (i64 0), EFLAGS),
+ (SBB64rr (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit),
+ (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit))>;
+
//===----------------------------------------------------------------------===//
// String Pseudo Instructions
//
Modified: llvm/trunk/test/CodeGen/X86/pr35972.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr35972.ll?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr35972.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr35972.ll Wed Dec 12 11:20:21 2018
@@ -5,6 +5,7 @@ define void @test3(i32 %c, <64 x i1>* %p
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: sbbl %ecx, %ecx
; CHECK-NEXT: kmovd %ecx, %k0
Modified: llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll Wed Dec 12 11:20:21 2018
@@ -697,8 +697,8 @@ define i64 @test4(i64 %a, i64 %b) nounwi
; ILP-NEXT: cmpq %rdi, %rsi
; ILP-NEXT: sbbq $0, %rdx
; ILP-NEXT: movl $0, %edx
-; ILP-NEXT: sbbq $0, %rdx
-; ILP-NEXT: sbbq $0, %rcx
+; ILP-NEXT: sbbq %rdx, %rdx
+; ILP-NEXT: sbbq %rcx, %rcx
; ILP-NEXT: setae %cl
; ILP-NEXT: movzbl %cl, %ecx
; ILP-NEXT: subq %rcx, %rax
@@ -713,8 +713,8 @@ define i64 @test4(i64 %a, i64 %b) nounwi
; HYBRID-NEXT: cmpq %rdi, %rsi
; HYBRID-NEXT: sbbq $0, %rcx
; HYBRID-NEXT: movl $0, %ecx
-; HYBRID-NEXT: sbbq $0, %rcx
-; HYBRID-NEXT: sbbq $0, %rax
+; HYBRID-NEXT: sbbq %rcx, %rcx
+; HYBRID-NEXT: sbbq %rax, %rax
; HYBRID-NEXT: setae %al
; HYBRID-NEXT: movzbl %al, %ecx
; HYBRID-NEXT: movl $2, %eax
@@ -730,8 +730,8 @@ define i64 @test4(i64 %a, i64 %b) nounwi
; BURR-NEXT: cmpq %rdi, %rsi
; BURR-NEXT: sbbq $0, %rcx
; BURR-NEXT: movl $0, %ecx
-; BURR-NEXT: sbbq $0, %rcx
-; BURR-NEXT: sbbq $0, %rax
+; BURR-NEXT: sbbq %rcx, %rcx
+; BURR-NEXT: sbbq %rax, %rax
; BURR-NEXT: setae %al
; BURR-NEXT: movzbl %al, %ecx
; BURR-NEXT: movl $2, %eax
@@ -747,8 +747,8 @@ define i64 @test4(i64 %a, i64 %b) nounwi
; SRC-NEXT: cmpq %rdi, %rsi
; SRC-NEXT: sbbq $0, %rax
; SRC-NEXT: movl $0, %eax
-; SRC-NEXT: sbbq $0, %rax
-; SRC-NEXT: sbbq $0, %rcx
+; SRC-NEXT: sbbq %rax, %rax
+; SRC-NEXT: sbbq %rcx, %rcx
; SRC-NEXT: setae %al
; SRC-NEXT: movzbl %al, %ecx
; SRC-NEXT: movl $2, %eax
@@ -765,8 +765,8 @@ define i64 @test4(i64 %a, i64 %b) nounwi
; LIN-NEXT: cmpq %rdi, %rsi
; LIN-NEXT: sbbq $0, %rdx
; LIN-NEXT: movl $0, %edx
-; LIN-NEXT: sbbq $0, %rdx
-; LIN-NEXT: sbbq $0, %rcx
+; LIN-NEXT: sbbq %rdx, %rdx
+; LIN-NEXT: sbbq %rcx, %rcx
; LIN-NEXT: setae %cl
; LIN-NEXT: movzbl %cl, %ecx
; LIN-NEXT: subq %rcx, %rax
Modified: llvm/trunk/test/CodeGen/X86/select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select.ll?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select.ll Wed Dec 12 11:20:21 2018
@@ -624,21 +624,13 @@ define void @test8(i1 %c, <6 x i32>* %ds
;; Test integer select between values and constants.
define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test9:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test9:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9:
; ATHLON: ## %bb.0:
@@ -672,21 +664,13 @@ define i64 @test9(i64 %x, i64 %y) nounwi
;; Same as test9
define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test9a:
-; GENERIC: ## %bb.0:
-; GENERIC-NEXT: cmpq $1, %rdi
-; GENERIC-NEXT: sbbq %rax, %rax
-; GENERIC-NEXT: orq %rsi, %rax
-; GENERIC-NEXT: retq
-;
-; ATOM-LABEL: test9a:
-; ATOM: ## %bb.0:
-; ATOM-NEXT: cmpq $1, %rdi
-; ATOM-NEXT: sbbq %rax, %rax
-; ATOM-NEXT: orq %rsi, %rax
-; ATOM-NEXT: nop
-; ATOM-NEXT: nop
-; ATOM-NEXT: retq
+; CHECK-LABEL: test9a:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpq $1, %rdi
+; CHECK-NEXT: sbbq %rax, %rax
+; CHECK-NEXT: orq %rsi, %rax
+; CHECK-NEXT: retq
;
; ATHLON-LABEL: test9a:
; ATHLON: ## %bb.0:
@@ -803,6 +787,7 @@ define i64 @test10(i64 %x, i64 %y) nounw
define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK-LABEL: test11:
; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: notq %rax
@@ -842,6 +827,7 @@ define i64 @test11(i64 %x, i64 %y) nounw
define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK-LABEL: test11a:
; CHECK: ## %bb.0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $1, %rdi
; CHECK-NEXT: sbbq %rax, %rax
; CHECK-NEXT: notq %rax
Modified: llvm/trunk/test/CodeGen/X86/shl-crash-on-legalize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shl-crash-on-legalize.ll?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shl-crash-on-legalize.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shl-crash-on-legalize.ll Wed Dec 12 11:20:21 2018
@@ -15,6 +15,7 @@ define i32 @PR29058(i8 %x, i32 %y) {
; CHECK-NEXT: testb %dil, %dil
; CHECK-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE
; CHECK-NEXT: cmovnel %esi, %eax
+; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: cmpb $1, %dil
; CHECK-NEXT: sbbb %dl, %dl
; CHECK-NEXT: orb %dl, %cl
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll?rev=348959&r1=348958&r2=348959&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-any_of.ll Wed Dec 12 11:20:21 2018
@@ -50,8 +50,9 @@ define i64 @test_v4f64_sext(<4 x double>
; AVX-LABEL: test_v4f64_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovmskpd %ymm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskpd %ymm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbq %rax, %rax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -83,9 +84,10 @@ define i64 @test_v4f64_legal_sext(<4 x d
; SSE-NEXT: cmpltpd %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: negl %eax
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: cltq
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl %eax, %ecx
+; SSE-NEXT: sbbl %ecx, %ecx
+; SSE-NEXT: movslq %ecx, %rax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64_legal_sext:
@@ -94,9 +96,10 @@ define i64 @test_v4f64_legal_sext(<4 x d
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: negl %eax
-; AVX-NEXT: sbbl %eax, %eax
-; AVX-NEXT: cltq
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl %eax, %ecx
+; AVX-NEXT: sbbl %ecx, %ecx
+; AVX-NEXT: movslq %ecx, %rax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
@@ -128,16 +131,18 @@ define i32 @test_v4f32_sext(<4 x float>
; SSE-LABEL: test_v4f32_sext:
; SSE: # %bb.0:
; SSE-NEXT: cmpltps %xmm0, %xmm1
-; SSE-NEXT: movmskps %xmm1, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm1, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
@@ -166,16 +171,18 @@ define i32 @test_v8f32_sext(<8 x float>
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
-; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm2, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX-NEXT: vmovmskps %ymm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %ymm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -210,8 +217,9 @@ define i32 @test_v8f32_legal_sext(<8 x f
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
-; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm2, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@@ -220,8 +228,9 @@ define i32 @test_v8f32_legal_sext(<8 x f
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
@@ -303,8 +312,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskpd %ymm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vmovmskpd %ymm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -312,8 +322,9 @@ define i64 @test_v4i64_sext(<4 x i64> %a
; AVX2-LABEL: test_v4i64_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskpd %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vmovmskpd %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -345,9 +356,10 @@ define i64 @test_v4i64_legal_sext(<4 x i
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: negl %eax
-; SSE-NEXT: sbbl %eax, %eax
-; SSE-NEXT: cltq
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl %eax, %ecx
+; SSE-NEXT: sbbl %ecx, %ecx
+; SSE-NEXT: movslq %ecx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v4i64_legal_sext:
@@ -358,9 +370,10 @@ define i64 @test_v4i64_legal_sext(<4 x i
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vmovmskps %xmm0, %eax
-; AVX1-NEXT: negl %eax
-; AVX1-NEXT: sbbl %eax, %eax
-; AVX1-NEXT: cltq
+; AVX1-NEXT: xorl %ecx, %ecx
+; AVX1-NEXT: cmpl %eax, %ecx
+; AVX1-NEXT: sbbl %ecx, %ecx
+; AVX1-NEXT: movslq %ecx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -370,9 +383,10 @@ define i64 @test_v4i64_legal_sext(<4 x i
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm0, %eax
-; AVX2-NEXT: negl %eax
-; AVX2-NEXT: sbbl %eax, %eax
-; AVX2-NEXT: cltq
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: cmpl %eax, %ecx
+; AVX2-NEXT: sbbl %ecx, %ecx
+; AVX2-NEXT: movslq %ecx, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -404,16 +418,18 @@ define i32 @test_v4i32_sext(<4 x i32> %a
; SSE-LABEL: test_v4i32_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i32_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovmskps %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vmovmskps %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
@@ -442,8 +458,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: movmskps %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@@ -454,8 +471,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vmovmskps %ymm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vmovmskps %ymm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -463,8 +481,9 @@ define i32 @test_v8i32_sext(<8 x i32> %a
; AVX2-LABEL: test_v8i32_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovmskps %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vmovmskps %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -499,8 +518,9 @@ define i32 @test_v8i32_legal_sext(<8 x i
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
@@ -511,8 +531,9 @@ define i32 @test_v8i32_legal_sext(<8 x i
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -522,8 +543,9 @@ define i32 @test_v8i32_legal_sext(<8 x i
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -559,8 +581,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a
; SSE-LABEL: test_v8i16_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@@ -568,8 +591,9 @@ define i16 @test_v8i16_sext(<8 x i16> %a
; AVX-LABEL: test_v8i16_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-NEXT: retq
@@ -604,8 +628,9 @@ define i16 @test_v16i16_sext(<16 x i16>
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@@ -632,8 +657,9 @@ define i16 @test_v16i16_sext(<16 x i16>
; AVX2-LABEL: test_v16i16_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -674,8 +700,9 @@ define i16 @test_v16i16_legal_sext(<16 x
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
; SSE-NEXT: retq
@@ -687,8 +714,9 @@ define i16 @test_v16i16_legal_sext(<16 x
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: negl %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %ecx
+; AVX1-NEXT: xorl %eax, %eax
+; AVX1-NEXT: cmpl %ecx, %eax
; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
; AVX1-NEXT: vzeroupper
@@ -699,8 +727,9 @@ define i16 @test_v16i16_legal_sext(<16 x
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
; AVX2-NEXT: vzeroupper
@@ -742,8 +771,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0
; SSE-LABEL: test_v16i8_sext:
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
@@ -751,8 +781,9 @@ define i8 @test_v16i8_sext(<16 x i8> %a0
; AVX-LABEL: test_v16i8_sext:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: negl %eax
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: cmpl %ecx, %eax
; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
@@ -791,8 +822,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; SSE-NEXT: pcmpgtb %xmm3, %xmm1
; SSE-NEXT: pcmpgtb %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: negl %eax
+; SSE-NEXT: pmovmskb %xmm0, %ecx
+; SSE-NEXT: xorl %eax, %eax
+; SSE-NEXT: cmpl %ecx, %eax
; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: def $al killed $al killed $eax
; SSE-NEXT: retq
@@ -821,8 +853,9 @@ define i8 @test_v32i8_sext(<32 x i8> %a0
; AVX2-LABEL: test_v32i8_sext:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpmovmskb %ymm0, %eax
-; AVX2-NEXT: negl %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl %ecx, %eax
; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: def $al killed $al killed $eax
; AVX2-NEXT: vzeroupper
More information about the llvm-commits
mailing list