[llvm] 9fde88c - [X86][AVX] splitIntVSETCC - handle separate (canonicalized) SETCC operands
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 25 03:19:14 PDT 2021
Author: Simon Pilgrim
Date: 2021-03-25T10:18:44Z
New Revision: 9fde88c3e21cc552af5fbb0b016fe186bad5c0f5
URL: https://github.com/llvm/llvm-project/commit/9fde88c3e21cc552af5fbb0b016fe186bad5c0f5
DIFF: https://github.com/llvm/llvm-project/commit/9fde88c3e21cc552af5fbb0b016fe186bad5c0f5.diff
LOG: [X86][AVX] splitIntVSETCC - handle separate (canonicalized) SETCC operands
LowerVSETCC calls splitIntVSETCC after canonicalizing certain patterns, in particular (X & CPow2 != 0) -> (X & CPow2 == CPow2).
Unfortunately if we're splitting for AVX1/non-AVX512BW cases, we lose these canonicalizations as we call the split with the original SetCC node, and when the split nodes are later lowered in LowerVSETCC the patterns are lost behind extract_subvector etc. But if we pass the canonicalized operands for splitting we retain the optimizations.
Differential Revision: https://reviews.llvm.org/D99256
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
llvm/test/CodeGen/X86/vector-sext.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 350a1b73c13ea..afbdb4f94cd2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22713,23 +22713,21 @@ static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
/// Break a VSETCC 256-bit integer VSETCC into two new 128 ones and then
/// concatenate the result back.
-static SDValue splitIntVSETCC(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
+static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS,
+ ISD::CondCode Cond, SelectionDAG &DAG,
+ const SDLoc &dl) {
+ assert(VT.isInteger() && VT == LHS.getValueType() &&
+ VT == RHS.getValueType() && "Unsupported VTs!");
- assert(Op.getOpcode() == ISD::SETCC && "Unsupported operation");
- assert(Op.getOperand(0).getValueType().isInteger() &&
- VT == Op.getOperand(0).getValueType() && "Unsupported VTs!");
-
- SDLoc dl(Op);
- SDValue CC = Op.getOperand(2);
+ SDValue CC = DAG.getCondCode(Cond);
// Extract the LHS Lo/Hi vectors
SDValue LHS1, LHS2;
- std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
+ std::tie(LHS1, LHS2) = splitVector(LHS, DAG, dl);
// Extract the RHS Lo/Hi vectors
SDValue RHS1, RHS2;
- std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
+ std::tie(RHS1, RHS2) = splitVector(RHS, DAG, dl);
// Issue the operation on the smaller types and concatenate the result back
EVT LoVT, HiVT;
@@ -23079,11 +23077,11 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
- return splitIntVSETCC(Op, DAG);
+ return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
if (VT == MVT::v32i16 || VT == MVT::v64i8) {
assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!");
- return splitIntVSETCC(Op, DAG);
+ return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
}
// If this is a SETNE against the signed minimum value, change it to SETGT.
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 64d12cc190d96..841782708cebd 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -208,14 +208,10 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i4_4i64:
@@ -258,14 +254,10 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i32:
@@ -310,14 +302,10 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i16:
@@ -365,12 +353,10 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -435,21 +421,15 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i64:
@@ -502,21 +482,15 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i32:
@@ -572,12 +546,10 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
+; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
@@ -585,9 +557,7 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
@@ -650,24 +620,18 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i64_64i8:
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index 0d04e0a214661..7c7db1678e7be 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -264,16 +264,12 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i4_4i64:
@@ -327,16 +323,12 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i32:
@@ -392,16 +384,12 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
+; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i16:
@@ -464,17 +452,15 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -558,25 +544,19 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm1, %xmm2
+; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqq {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i64:
@@ -643,25 +623,19 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm2
+; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm1, %xmm2
+; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i32:
@@ -730,14 +704,12 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
+; AVX1-NEXT: vpcmpeqw %xmm5, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
@@ -745,11 +717,9 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
-; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
@@ -839,33 +809,27 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
-; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
+; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i64_64i8:
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
index 65bf43ff2ed9e..b4fb48ade63bb 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll
@@ -205,17 +205,15 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 212b552254722..7647017127491 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -2333,14 +2333,10 @@ define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_sext_8i1_to_8i32:
@@ -2558,14 +2554,10 @@ define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_sext_16i1_to_16i16:
@@ -2636,12 +2628,10 @@ define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745]
+; AVX1-NEXT: # xmm2 = mem[0,0]
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
More information about the llvm-commits
mailing list