[llvm] r297586 - [x86] don't blindly transform SETB into SBB
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 12 11:28:49 PDT 2017
Author: spatel
Date: Sun Mar 12 13:28:48 2017
New Revision: 297586
URL: http://llvm.org/viewvc/llvm-project?rev=297586&view=rev
Log:
[x86] don't blindly transform SETB into SBB
I noticed unnecessary 'sbb' instructions in D30472 and while looking at 'ptest' codegen recently.
This happens because we were transforming any 'setb' - even when we only wanted a single-bit result.
This patch moves those transforms under visitAdd/visitSub, so we we're only creating sbb/adc when it
is a win. I don't know why we need a SETCC_CARRY node type, but I'm not proposing to change that
existing behavior in this patch.
Also, I'm skeptical that sbb/adc are a win for all micro-arches, so I added comments to the test files
where this transform still fires.
The test changes here are all cases where we no longer produce sbb/adc. Avoiding partial register
stalls (generating an xor to clear a register) is not handled in some cases, but that's a separate
issue.
Differential Revision: https://reviews.llvm.org/D30611
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/add-of-carry.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/ctpop-combine.ll
llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll
llvm/trunk/test/CodeGen/X86/peep-setb.ll
llvm/trunk/test/CodeGen/X86/pr26350.ll
llvm/trunk/test/CodeGen/X86/setcc.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
llvm/trunk/test/CodeGen/X86/sse41.ll
llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Mar 12 13:28:48 2017
@@ -33916,21 +33916,6 @@ static SDValue combineGatherScatter(SDNo
return SDValue();
}
-/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
-/// which is more useful than 0/1 in some cases.
-static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
- SDLoc DL(N);
- // "Condition code B" is also known as "the carry flag" (CF).
- SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
- SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
- MVT VT = N->getSimpleValueType(0);
- if (VT == MVT::i8)
- return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
-
- assert(VT == MVT::i1 && "Unexpected type for SETCC node");
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
-}
-
// Optimize RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -33938,27 +33923,6 @@ static SDValue combineX86SetCC(SDNode *N
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0));
SDValue EFLAGS = N->getOperand(1);
- if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
- // materializing "setb reg".
- //
- // Do not flip "e > c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
- EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return materializeSBB(N, NewEFLAGS, DAG);
- }
- }
-
- if (CC == X86::COND_B)
- return materializeSBB(N, EFLAGS, DAG);
-
// Try to simplify the EFLAGS and condition code operands.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
return getSETCC(CC, Flags, DL, DAG);
@@ -34153,6 +34117,21 @@ static SDValue combineADC(SDNode *N, Sel
return SDValue();
}
+/// Materialize "setb reg" as "sbb reg,reg", since it produces an all-ones bit
+/// which is more useful than 0/1 in some cases.
+static SDValue materializeSBB(SDNode *N, SDValue EFLAGS, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ // "Condition code B" is also known as "the carry flag" (CF).
+ SDValue CF = DAG.getConstant(X86::COND_B, DL, MVT::i8);
+ SDValue SBB = DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8, CF, EFLAGS);
+ MVT VT = N->getSimpleValueType(0);
+ if (VT == MVT::i8)
+ return DAG.getNode(ISD::AND, DL, VT, SBB, DAG.getConstant(1, DL, VT));
+
+ assert(VT == MVT::i1 && "Unexpected type for SETCC node");
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SBB);
+}
+
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
@@ -34180,7 +34159,42 @@ static SDValue combineAddOrSubToADCOrSBB
if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
return SDValue();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
+
+ if (CC == X86::COND_B) {
+ // X + SETB Z --> X + (mask SBB Z, Z)
+ // X - SETB Z --> X - (mask SBB Z, Z)
+ // TODO: Produce ADC/SBB here directly and avoid SETCC_CARRY?
+ SDValue SBB = materializeSBB(Y.getNode(), Y.getOperand(1), DAG);
+ if (SBB.getValueSizeInBits() != VT.getSizeInBits())
+ SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
+ return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ }
+
+ if (CC == X86::COND_A) {
+ SDValue EFLAGS = Y->getOperand(1);
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS),
+ EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ SDValue SBB = materializeSBB(Y.getNode(), NewEFLAGS, DAG);
+ if (SBB.getValueSizeInBits() != VT.getSizeInBits())
+ SBB = DAG.getZExtOrTrunc(SBB, DL, VT);
+ return DAG.getNode(IsSub ? ISD::SUB : ISD::ADD, DL, VT, X, SBB);
+ }
+ }
+
if (CC != X86::COND_E && CC != X86::COND_NE)
return SDValue();
@@ -34190,9 +34204,6 @@ static SDValue combineAddOrSubToADCOrSBB
!Cmp.getOperand(0).getValueType().isInteger())
return SDValue();
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
// (cmp Z, 1) sets the carry flag if Z is 0.
SDValue Z = Cmp.getOperand(0);
SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, Z,
Modified: llvm/trunk/test/CodeGen/X86/add-of-carry.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/add-of-carry.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/add-of-carry.ll (original)
+++ llvm/trunk/test/CodeGen/X86/add-of-carry.ll Sun Mar 12 13:28:48 2017
@@ -1,6 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
+; These tests use adc/sbb in place of set+add/sub. Should this transform
+; be enabled by micro-architecture rather than as part of generic lowering/isel?
+
; <rdar://problem/8449754>
define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Sun Mar 12 13:28:48 2017
@@ -3310,16 +3310,16 @@ define <8 x float> @test_mm256_sub_ps(<8
define i32 @test_mm_testc_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; X32-LABEL: test_mm_testc_pd:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestpd %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_testc_pd:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestpd %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
@@ -3329,17 +3329,17 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x
define i32 @test_mm256_testc_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_testc_pd:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestpd %ymm1, %ymm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_testc_pd:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestpd %ymm1, %ymm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
@@ -3350,16 +3350,16 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(
define i32 @test_mm_testc_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm_testc_ps:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestps %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_testc_ps:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestps %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
ret i32 %res
@@ -3369,17 +3369,17 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x
define i32 @test_mm256_testc_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
; X32-LABEL: test_mm256_testc_ps:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestps %ymm1, %ymm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_testc_ps:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestps %ymm1, %ymm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
@@ -3390,17 +3390,17 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(
define i32 @test_mm256_testc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_testc_si256:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vptest %ymm1, %ymm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_testc_si256:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vptest %ymm1, %ymm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-x86.ll Sun Mar 12 13:28:48 2017
@@ -497,9 +497,9 @@ declare i32 @llvm.x86.avx.movmsk.ps.256(
define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
; CHECK-LABEL: test_x86_avx_ptestc_256:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: vptest %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x17,0xc1]
-; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
@@ -746,9 +746,9 @@ declare <8 x float> @llvm.x86.avx.vpermi
define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_pd:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: vtestpd %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0f,0xc1]
-; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -759,9 +759,9 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x
define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: vtestpd %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0f,0xc1]
-; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
@@ -773,9 +773,9 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(
define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_ps:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: vtestps %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0e,0xc1]
-; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
ret i32 %res
@@ -786,9 +786,9 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x
define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; CHECK-NEXT: vtestps %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0e,0xc1]
-; CHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; CHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; CHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Sun Mar 12 13:28:48 2017
@@ -334,7 +334,7 @@ define i16 @test13(i32 %a, i32 %b) {
; KNL-LABEL: test13:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
-; KNL-NEXT: sbbb %al, %al
+; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: movw $-4, %ax
@@ -348,7 +348,7 @@ define i16 @test13(i32 %a, i32 %b) {
; SKX-LABEL: test13:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: sbbb %al, %al
+; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: movw $-4, %ax
@@ -1122,135 +1122,137 @@ define i32 @test_insertelement_v32i1(i32
; KNL-NEXT: .cfi_def_cfa_register %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp
+; KNL-NEXT: xorl %eax, %eax
; KNL-NEXT: cmpl %esi, %edi
+; KNL-NEXT: setb %al
; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm1
-; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: vmovd %edx, %xmm1
+; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1
; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
+; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %ecx
-; KNL-NEXT: vmovd %ecx, %xmm0
-; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: vmovd %edx, %xmm0
+; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
-; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k1, %ecx
+; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; KNL-NEXT: kshiftrw $15, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; KNL-NEXT: kmovw %k0, %ecx
+; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
@@ -1261,8 +1263,6 @@ define i32 @test_insertelement_v32i1(i32
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; KNL-NEXT: sbbl %eax, %eax
-; KNL-NEXT: andl $1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
@@ -1276,7 +1276,7 @@ define i32 @test_insertelement_v32i1(i32
; SKX-LABEL: test_insertelement_v32i1:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: sbbb %al, %al
+; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k1
@@ -1301,7 +1301,7 @@ define i8 @test_iinsertelement_v4i1(i32
; KNL-LABEL: test_iinsertelement_v4i1:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
-; KNL-NEXT: sbbb %al, %al
+; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
@@ -1341,7 +1341,7 @@ define i8 @test_iinsertelement_v4i1(i32
; SKX-LABEL: test_iinsertelement_v4i1:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: sbbb %al, %al
+; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k1
@@ -1364,7 +1364,7 @@ define i8 @test_iinsertelement_v2i1(i32
; KNL-LABEL: test_iinsertelement_v2i1:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
-; KNL-NEXT: sbbb %al, %al
+; KNL-NEXT: setb %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
@@ -1386,7 +1386,7 @@ define i8 @test_iinsertelement_v2i1(i32
; SKX-LABEL: test_iinsertelement_v2i1:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
-; SKX-NEXT: sbbb %al, %al
+; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Mar 12 13:28:48 2017
@@ -21,9 +21,9 @@ define i32 @test_kortestc(i16 %a0, i16 %
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: kortestw %k0, %k1
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
%res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/ctpop-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ctpop-combine.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ctpop-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/ctpop-combine.ll Sun Mar 12 13:28:48 2017
@@ -36,11 +36,11 @@ define i32 @test2(i64 %x) nounwind readn
define i32 @test3(i64 %x) nounwind readnone {
; CHECK-LABEL: test3:
; CHECK: # BB#0:
-; CHECK-NEXT: popcntq %rdi, %rax
-; CHECK-NEXT: andb $63, %al
-; CHECK-NEXT: cmpb $2, %al
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: popcntq %rdi, %rcx
+; CHECK-NEXT: andb $63, %cl
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpb $2, %cl
+; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cast = trunc i64 %count to i6 ; Too small for 0-64
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-cmp.ll Sun Mar 12 13:28:48 2017
@@ -301,8 +301,8 @@ define zeroext i1 @icmp_ne(i32 %x, i32 %
define zeroext i1 @icmp_ugt(i32 %x, i32 %y) {
; SDAG-LABEL: icmp_ugt:
; SDAG: ## BB#0:
-; SDAG-NEXT: cmpl %edi, %esi
-; SDAG-NEXT: setb %al
+; SDAG-NEXT: cmpl %esi, %edi
+; SDAG-NEXT: seta %al
; SDAG-NEXT: retq
;
; FAST-LABEL: icmp_ugt:
Modified: llvm/trunk/test/CodeGen/X86/peep-setb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/peep-setb.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/peep-setb.ll (original)
+++ llvm/trunk/test/CodeGen/X86/peep-setb.ll Sun Mar 12 13:28:48 2017
@@ -1,6 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; These tests use cmp+adc/sbb in place of test+set+add/sub. Should this transform
+; be enabled by micro-architecture rather than as part of generic lowering/isel?
+
define i8 @test1(i8 %a, i8 %b) nounwind {
; CHECK-LABEL: test1:
; CHECK: # BB#0:
Modified: llvm/trunk/test/CodeGen/X86/pr26350.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr26350.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr26350.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr26350.ll Sun Mar 12 13:28:48 2017
@@ -15,8 +15,8 @@ define i32 @main() {
; CHECK-NEXT: andl $16, %eax
; CHECK-NEXT: cmpl $-1, %eax
; CHECK-NEXT: sbbl $0, %ecx
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $1, %eax
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: retl
entry:
%load = load i32, i32* @d, align 4
Modified: llvm/trunk/test/CodeGen/X86/setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/setcc.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/setcc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/setcc.ll Sun Mar 12 13:28:48 2017
@@ -21,9 +21,10 @@ define zeroext i16 @t1(i16 zeroext %x) n
define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
; CHECK-LABEL: t2:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $26, %edi
-; CHECK-NEXT: sbbl %eax, %eax
-; CHECK-NEXT: andl $32, %eax
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: shll $5, %eax
; CHECK-NEXT: retq
%t0 = icmp ult i16 %x, 26
%if = select i1 %t0, i16 32, i16 0
@@ -33,9 +34,10 @@ define zeroext i16 @t2(i16 zeroext %x) n
define i64 @t3(i64 %x) nounwind readnone ssp {
; CHECK-LABEL: t3:
; CHECK: ## BB#0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpq $18, %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: andl $64, %eax
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: shlq $6, %rax
; CHECK-NEXT: retq
%t0 = icmp ult i64 %x, 18
%if = select i1 %t0, i64 64, i64 0
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll Sun Mar 12 13:28:48 2017
@@ -898,17 +898,17 @@ define i32 @test_mm_test_all_ones(<2 x i
; X32-LABEL: test_mm_test_all_ones:
; X32: # BB#0:
; X32-NEXT: pcmpeqd %xmm1, %xmm1
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: ptest %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_test_all_ones:
; X64: # BB#0:
; X64-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ptest %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> <i64 -1, i64 -1>)
ret i32 %res
@@ -956,16 +956,16 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2
define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_testc_si128:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: ptest %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_testc_si128:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ptest %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-x86.ll Sun Mar 12 13:28:48 2017
@@ -362,16 +362,16 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq
define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
; SSE41-LABEL: test_x86_sse41_ptestc:
; SSE41: ## BB#0:
+; SSE41-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; SSE41-NEXT: ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1]
-; SSE41-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; SSE41-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; SSE41-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; SSE41-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse41_ptestc:
; VCHECK: ## BB#0:
+; VCHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; VCHECK-NEXT: vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1]
-; VCHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; VCHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; VCHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; VCHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
ret i32 %res
Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Sun Mar 12 13:28:48 2017
@@ -228,16 +228,16 @@ define i32 @ptestz_1(<2 x i64> %t1, <2 x
define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
; X32-LABEL: ptestz_2:
; X32: ## BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: ptest %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: ptestz_2:
; X64: ## BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ptest %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
ret i32 %tmp1
Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll Sun Mar 12 13:28:48 2017
@@ -33,23 +33,27 @@ define i32 @test_mm_cmpestra(<2 x i64> %
}
declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
-define i32 @test_mm_cmpestrc(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) {
+define i32 @test_mm_cmpestrc(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind {
; X32-LABEL: test_mm_cmpestrc:
; X32: # BB#0:
+; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: xorl %ebx, %ebx
; X32-NEXT: pcmpestri $7, %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %bl
+; X32-NEXT: movl %ebx, %eax
+; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cmpestrc:
; X64: # BB#0:
+; X64-NEXT: xorl %r8d, %r8d
; X64-NEXT: movl %edi, %eax
; X64-NEXT: movl %esi, %edx
; X64-NEXT: pcmpestri $7, %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %r8b
+; X64-NEXT: movl %r8d, %eax
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg2 = bitcast <2 x i64> %a2 to <16 x i8>
@@ -229,16 +233,16 @@ declare i32 @llvm.x86.sse42.pcmpistria12
define i32 @test_mm_cmpistrc(<2 x i64> %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_cmpistrc:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cmpistrc:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: pcmpistri $7, %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
%arg1 = bitcast <2 x i64> %a1 to <16 x i8>
Modified: llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll?rev=297586&r1=297585&r2=297586&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse42-intrinsics-x86.ll Sun Mar 12 13:28:48 2017
@@ -95,23 +95,29 @@ define i32 @test_x86_sse42_pcmpestria128
declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
-define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) nounwind {
; SSE42-LABEL: test_x86_sse42_pcmpestric128:
; SSE42: ## BB#0:
+; SSE42-NEXT: pushl %ebx ## encoding: [0x53]
; SSE42-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00]
; SSE42-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00]
+; SSE42-NEXT: xorl %ebx, %ebx ## encoding: [0x31,0xdb]
; SSE42-NEXT: pcmpestri $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x61,0xc1,0x07]
-; SSE42-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; SSE42-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; SSE42-NEXT: setb %bl ## encoding: [0x0f,0x92,0xc3]
+; SSE42-NEXT: movl %ebx, %eax ## encoding: [0x89,0xd8]
+; SSE42-NEXT: popl %ebx ## encoding: [0x5b]
; SSE42-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse42_pcmpestric128:
; VCHECK: ## BB#0:
+; VCHECK-NEXT: pushl %ebx ## encoding: [0x53]
; VCHECK-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00]
; VCHECK-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00]
+; VCHECK-NEXT: xorl %ebx, %ebx ## encoding: [0x31,0xdb]
; VCHECK-NEXT: vpcmpestri $7, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0xc1,0x07]
-; VCHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; VCHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; VCHECK-NEXT: setb %bl ## encoding: [0x0f,0x92,0xc3]
+; VCHECK-NEXT: movl %ebx, %eax ## encoding: [0x89,0xd8]
+; VCHECK-NEXT: popl %ebx ## encoding: [0x5b]
; VCHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
ret i32 %res
@@ -326,16 +332,16 @@ declare i32 @llvm.x86.sse42.pcmpistria12
define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
; SSE42-LABEL: test_x86_sse42_pcmpistric128:
; SSE42: ## BB#0:
+; SSE42-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; SSE42-NEXT: pcmpistri $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x63,0xc1,0x07]
-; SSE42-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; SSE42-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; SSE42-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; SSE42-NEXT: retl ## encoding: [0xc3]
;
; VCHECK-LABEL: test_x86_sse42_pcmpistric128:
; VCHECK: ## BB#0:
+; VCHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
; VCHECK-NEXT: vpcmpistri $7, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0xc1,0x07]
-; VCHECK-NEXT: sbbl %eax, %eax ## encoding: [0x19,0xc0]
-; VCHECK-NEXT: andl $1, %eax ## encoding: [0x83,0xe0,0x01]
+; VCHECK-NEXT: setb %al ## encoding: [0x0f,0x92,0xc0]
; VCHECK-NEXT: retl ## encoding: [0xc3]
%res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
ret i32 %res
More information about the llvm-commits
mailing list