[llvm] ee0d46a - [X86] Transform `(cmp eq/ne trunc(X), C)` -> `(cmp eq/ne X, Zext(C))`
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 23 00:26:21 PDT 2023
Author: Noah Goldstein
Date: 2023-04-23T02:26:11-05:00
New Revision: ee0d46ae5569fa433c9ca001ec2dc7cb412b95a4
URL: https://github.com/llvm/llvm-project/commit/ee0d46ae5569fa433c9ca001ec2dc7cb412b95a4
DIFF: https://github.com/llvm/llvm-project/commit/ee0d46ae5569fa433c9ca001ec2dc7cb412b95a4.diff
LOG: [X86] Transform `(cmp eq/ne trunc(X), C)` -> `(cmp eq/ne X, Zext(C))`
This previously existed for `C == 0`, but is mostly beneficial for any
`C`.
There is a slight codesize cost as we get more imm32 (as opposed to
imm8) constants in some cases. But the benefit is was get less imm16
constants (LCP stalls) and save instructions in some vec -> scalar
codegen.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D148594
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/movmsk-cmp.ll
llvm/test/CodeGen/X86/setcc-logic.ll
llvm/test/CodeGen/X86/vector-compare-all_of.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 533542f46477..38916f55f2c4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54481,21 +54481,24 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
- // cmpeq(trunc(x),0) --> cmpeq(x,0)
- // cmpne(trunc(x),0) --> cmpne(x,0)
+ // cmpeq(trunc(x),C) --> cmpeq(x,C)
+ // cmpne(trunc(x),C) --> cmpne(x,C)
// iff x upper bits are zero.
- // TODO: Add support for RHS to be truncate as well?
if (LHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
- isNullConstant(RHS) && !DCI.isBeforeLegalize()) {
+ isa<ConstantSDNode>(RHS) && !DCI.isBeforeLegalize()) {
EVT SrcVT = LHS.getOperand(0).getValueType();
APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
OpVT.getScalarSizeInBits());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto *C = cast<ConstantSDNode>(RHS);
if (DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) &&
TLI.isTypeLegal(LHS.getOperand(0).getValueType()))
return DAG.getSetCC(DL, VT, LHS.getOperand(0),
- DAG.getConstant(0, DL, SrcVT), CC);
+ DAG.getConstant(C->getAPIntValue().zextOrTrunc(
+ SrcVT.getScalarSizeInBits()),
+ DL, SrcVT),
+ CC);
}
// With C as a power of 2 and C != 0 and C != INT_MIN:
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 708d3cda667e..6db6b7bc4dc1 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -10,14 +10,14 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) {
; SSE-LABEL: allones_v16i8_sign:
; SSE: # %bb.0:
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
; AVX-LABEL: allones_v16i8_sign:
; AVX: # %bb.0:
; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: cmpw $-1, %ax
+; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX-NEXT: sete %al
; AVX-NEXT: retq
%tmp = icmp slt <16 x i8> %arg, zeroinitializer
@@ -51,7 +51,7 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -60,7 +60,7 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -133,7 +133,7 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) {
; SSE-NEXT: pand %xmm3, %xmm1
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -145,7 +145,7 @@ define i1 @allones_v64i8_sign(<64 x i8> %arg) {
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -322,7 +322,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -331,7 +331,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -341,7 +341,7 @@ define i1 @allones_v16i16_sign(<16 x i16> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: cmpw $-1, %ax
+; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -428,7 +428,7 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
; SSE-NEXT: packsswb %xmm3, %xmm2
; SSE-NEXT: pand %xmm0, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -440,7 +440,7 @@ define i1 @allones_v32i16_sign(<32 x i16> %arg) {
; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -549,7 +549,7 @@ define i1 @allones_v4i32_sign(<4 x i32> %arg) {
; SSE-LABEL: allones_v4i32_sign:
; SSE: # %bb.0:
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -652,7 +652,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: packsswb %xmm2, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -664,7 +664,7 @@ define i1 @allones_v16i32_sign(<16 x i32> %arg) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -766,7 +766,7 @@ define i1 @allones_v4i64_sign(<4 x i64> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -937,7 +937,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: psllw $7, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -945,7 +945,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
-; AVX1OR2-NEXT: cmpw $-1, %ax
+; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
@@ -953,7 +953,7 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) {
; KNL: # %bb.0:
; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
; KNL-NEXT: vpmovmskb %xmm0, %eax
-; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
@@ -1138,7 +1138,7 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: psllw $7, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1148,7 +1148,7 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) {
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1223,7 +1223,7 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: psllw $7, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1236,7 +1236,7 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1405,7 +1405,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; SSE-NEXT: psllw $15, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1416,7 +1416,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1427,7 +1427,7 @@ define i1 @allones_v16i16_and1(<16 x i16> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: cmpw $-1, %ax
+; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1468,7 +1468,7 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
; SSE-NEXT: packsswb %xmm3, %xmm2
; SSE-NEXT: pand %xmm0, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1484,7 +1484,7 @@ define i1 @allones_v32i16_and1(<32 x i16> %arg) {
; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1619,7 +1619,7 @@ define i1 @allones_v4i32_and1(<4 x i32> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: pslld $31, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1809,7 +1809,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: packsswb %xmm2, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1825,7 +1825,7 @@ define i1 @allones_v16i32_and1(<16 x i32> %arg) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1914,7 +1914,7 @@ define i1 @allones_v2i64_and1(<2 x i64> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: psllq $63, %xmm0
; SSE-NEXT: movmskpd %xmm0, %eax
-; SSE-NEXT: cmpb $3, %al
+; SSE-NEXT: cmpl $3, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1998,7 +1998,7 @@ define i1 @allones_v4i64_and1(<4 x i64> %arg) {
; SSE-NEXT: psllq $63, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2218,7 +2218,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: psllw $5, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2226,7 +2226,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpsllw $5, %xmm0, %xmm0
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
-; AVX1OR2-NEXT: cmpw $-1, %ax
+; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
@@ -2234,7 +2234,7 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) {
; KNL: # %bb.0:
; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
; KNL-NEXT: vpmovmskb %xmm0, %eax
-; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
@@ -2284,7 +2284,7 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
; SSE-NEXT: pand %xmm1, %xmm0
; SSE-NEXT: psllw $5, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2294,7 +2294,7 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) {
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2369,7 +2369,7 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
; SSE-NEXT: pand %xmm0, %xmm1
; SSE-NEXT: psllw $5, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2382,7 +2382,7 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
; AVX1-NEXT: vpand %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vpsllw $5, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2551,7 +2551,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; SSE-NEXT: psllw $13, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2562,7 +2562,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; AVX1-NEXT: vpsllw $13, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2573,7 +2573,7 @@ define i1 @allones_v16i16_and4(<16 x i16> %arg) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: cmpw $-1, %ax
+; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX2-NEXT: sete %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -2614,7 +2614,7 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
; SSE-NEXT: packsswb %xmm3, %xmm2
; SSE-NEXT: pand %xmm0, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2630,7 +2630,7 @@ define i1 @allones_v32i16_and4(<32 x i16> %arg) {
; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -2765,7 +2765,7 @@ define i1 @allones_v4i32_and4(<4 x i32> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: pslld $29, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2955,7 +2955,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: packsswb %xmm2, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -2971,7 +2971,7 @@ define i1 @allones_v16i32_and4(<16 x i32> %arg) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: cmpw $-1, %ax
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1-NEXT: sete %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -3060,7 +3060,7 @@ define i1 @allones_v2i64_and4(<2 x i64> %arg) {
; SSE: # %bb.0:
; SSE-NEXT: psllq $61, %xmm0
; SSE-NEXT: movmskpd %xmm0, %eax
-; SSE-NEXT: cmpb $3, %al
+; SSE-NEXT: cmpl $3, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -3144,7 +3144,7 @@ define i1 @allones_v4i64_and4(<4 x i64> %arg) {
; SSE-NEXT: psllq $61, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -3723,8 +3723,7 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: movmskpd %xmm1, %eax
-; SSE2-NEXT: xorl $3, %eax
-; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: testl %eax, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -3732,17 +3731,14 @@ define i1 @movmsk_and_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE41: # %bb.0:
; SSE41-NEXT: pcmpeqq %xmm1, %xmm0
; SSE41-NEXT: movmskpd %xmm0, %eax
-; SSE41-NEXT: xorl $3, %eax
-; SSE41-NEXT: cmpb $3, %al
+; SSE41-NEXT: testl %eax, %eax
; SSE41-NEXT: sete %al
; SSE41-NEXT: retq
;
; AVX1OR2-LABEL: movmsk_and_v2i64:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: xorl $3, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
+; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
@@ -3854,7 +3850,7 @@ define i1 @movmsk_and_v2f64(<2 x double> %x, <2 x double> %y) {
; SSE: # %bb.0:
; SSE-NEXT: cmplepd %xmm0, %xmm1
; SSE-NEXT: movmskpd %xmm1, %eax
-; SSE-NEXT: cmpb $3, %al
+; SSE-NEXT: cmpl $3, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -4282,7 +4278,7 @@ define i32 @PR39665_c_ray_opt(<2 x double> %x, <2 x double> %y) {
; SSE: # %bb.0:
; SSE-NEXT: cmpltpd %xmm0, %xmm1
; SSE-NEXT: movmskpd %xmm1, %eax
-; SSE-NEXT: cmpb $3, %al
+; SSE-NEXT: cmpl $3, %eax
; SSE-NEXT: movl $42, %ecx
; SSE-NEXT: movl $99, %eax
; SSE-NEXT: cmovel %ecx, %eax
diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll
index baad9698472a..933295d8d1aa 100644
--- a/llvm/test/CodeGen/X86/setcc-logic.ll
+++ b/llvm/test/CodeGen/X86/setcc-logic.ll
@@ -324,7 +324,7 @@ define i32 @vec_extract_branch(<2 x double> %x) {
; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: cmpltpd %xmm0, %xmm1
; CHECK-NEXT: movmskpd %xmm1, %eax
-; CHECK-NEXT: cmpb $3, %al
+; CHECK-NEXT: cmpl $3, %eax
; CHECK-NEXT: jne .LBB16_2
; CHECK-NEXT: # %bb.1: # %true
; CHECK-NEXT: movl $42, %eax
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index 88921340b337..f152767f8478 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -957,7 +957,7 @@ define i1 @bool_reduction_v2f64(<2 x double> %x, <2 x double> %y) {
; SSE: # %bb.0:
; SSE-NEXT: cmpltpd %xmm0, %xmm1
; SSE-NEXT: movmskpd %xmm1, %eax
-; SSE-NEXT: cmpb $3, %al
+; SSE-NEXT: cmpl $3, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -988,7 +988,7 @@ define i1 @bool_reduction_v4f32(<4 x float> %x, <4 x float> %y) {
; SSE: # %bb.0:
; SSE-NEXT: cmpeqps %xmm1, %xmm0
; SSE-NEXT: movmskps %xmm0, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1023,7 +1023,7 @@ define i1 @bool_reduction_v4f64(<4 x double> %x, <4 x double> %y) {
; SSE-NEXT: cmplepd %xmm0, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: movmskps %xmm2, %eax
-; SSE-NEXT: cmpb $15, %al
+; SSE-NEXT: cmpl $15, %eax
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1120,8 +1120,7 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: movmskpd %xmm1, %eax
-; SSE2-NEXT: xorl $3, %eax
-; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: testl %eax, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -1129,17 +1128,14 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
; SSE42: # %bb.0:
; SSE42-NEXT: pcmpeqq %xmm1, %xmm0
; SSE42-NEXT: movmskpd %xmm0, %eax
-; SSE42-NEXT: xorl $3, %eax
-; SSE42-NEXT: cmpb $3, %al
+; SSE42-NEXT: testl %eax, %eax
; SSE42-NEXT: sete %al
; SSE42-NEXT: retq
;
; AVX1OR2-LABEL: bool_reduction_v2i64:
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT: xorl $3, %eax
-; AVX1OR2-NEXT: cmpb $3, %al
+; AVX1OR2-NEXT: vtestpd %xmm0, %xmm0
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
@@ -1165,7 +1161,7 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2-NEXT: pxor %xmm2, %xmm0
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
-; SSE2-NEXT: cmpb $15, %al
+; SSE2-NEXT: cmpl $15, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -1174,8 +1170,7 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE42-NEXT: pminud %xmm0, %xmm1
; SSE42-NEXT: pcmpeqd %xmm0, %xmm1
; SSE42-NEXT: movmskps %xmm1, %eax
-; SSE42-NEXT: xorl $15, %eax
-; SSE42-NEXT: cmpb $15, %al
+; SSE42-NEXT: testl %eax, %eax
; SSE42-NEXT: sete %al
; SSE42-NEXT: retq
;
@@ -1183,9 +1178,7 @@ define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpminud %xmm1, %xmm0, %xmm1
; AVX1OR2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
-; AVX1OR2-NEXT: xorl $15, %eax
-; AVX1OR2-NEXT: cmpb $15, %al
+; AVX1OR2-NEXT: vtestps %xmm0, %xmm0
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
@@ -1247,7 +1240,7 @@ define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SSE: # %bb.0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; SSE-NEXT: sete %al
; SSE-NEXT: retq
;
@@ -1255,7 +1248,7 @@ define i1 @bool_reduction_v16i8(<16 x i8> %x, <16 x i8> %y) {
; AVX1OR2: # %bb.0:
; AVX1OR2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
-; AVX1OR2-NEXT: cmpw $-1, %ax
+; AVX1OR2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
; AVX1OR2-NEXT: sete %al
; AVX1OR2-NEXT: retq
;
@@ -1300,7 +1293,7 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
; SSE2-NEXT: por %xmm2, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
-; SSE2-NEXT: cmpb $15, %al
+; SSE2-NEXT: cmpl $15, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -1310,7 +1303,7 @@ define i1 @bool_reduction_v4i64(<4 x i64> %x, <4 x i64> %y) {
; SSE42-NEXT: pcmpgtq %xmm0, %xmm2
; SSE42-NEXT: packssdw %xmm3, %xmm2
; SSE42-NEXT: movmskps %xmm2, %eax
-; SSE42-NEXT: cmpb $15, %al
+; SSE42-NEXT: cmpl $15, %eax
; SSE42-NEXT: sete %al
; SSE42-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 0c6faeec8344..8814cd592a8e 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -16,7 +16,7 @@ define i1 @trunc_v2i64_v2i1(<2 x i64>) {
; SSE2: # %bb.0:
; SSE2-NEXT: psllq $63, %xmm0
; SSE2-NEXT: movmskpd %xmm0, %eax
-; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: cmpl $3, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
@@ -151,7 +151,7 @@ define i1 @trunc_v4i64_v4i1(<4 x i64>) {
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
-; SSE2-NEXT: cmpb $15, %al
+; SSE2-NEXT: cmpl $15, %eax
; SSE2-NEXT: sete %al
; SSE2-NEXT: retq
;
More information about the llvm-commits
mailing list