[llvm] 85cf2e8 - [X86] combineConcatVectorOps - concatenation of constant subvectors is free.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 29 06:21:32 PDT 2023
Author: Simon Pilgrim
Date: 2023-08-29T14:15:24+01:00
New Revision: 85cf2e8286ddcd77368879584363d46e42899a43
URL: https://github.com/llvm/llvm-project/commit/85cf2e8286ddcd77368879584363d46e42899a43
DIFF: https://github.com/llvm/llvm-project/commit/85cf2e8286ddcd77368879584363d46e42899a43.diff
LOG: [X86] combineConcatVectorOps - concatenation of constant subvectors is free.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
llvm/test/CodeGen/X86/vector-trunc-usat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 38b48cdfc040f9..9a80bbf5a717ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54378,15 +54378,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
};
auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
+ bool AllConstants = true;
+ bool AllSubVectors = true;
for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
SDValue Sub = SubOps[I].getOperand(Op);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (Sub.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- Sub.getOperand(0).getValueType() != VT ||
- Sub.getConstantOperandAPInt(1) != (I * NumSubElts))
- return false;
- }
- return true;
+ SDValue BC = peekThroughBitcasts(Sub);
+ AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
+ AllSubVectors &= Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Sub.getOperand(0).getValueType() == VT &&
+ Sub.getConstantOperandAPInt(1) == (I * NumSubElts);
+ }
+ return AllConstants || AllSubVectors;
};
switch (Op0.getOpcode()) {
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
index d4ecb53b839970..e0eca731b144d0 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll
@@ -531,28 +531,28 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
;
; AVX1-LABEL: truncstore_v8i64_v8i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
-; AVX1-NEXT: # xmm6 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm7, %xmm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm7
+; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm5, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
+; AVX1-NEXT: vpxor %xmm3, %xmm9, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm3, %xmm9, %xmm5, %xmm3
+; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm5, %xmm3
+; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
@@ -1003,28 +1003,28 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, ptr %p, <8 x i32> %mask) {
;
; AVX1-LABEL: truncstore_v8i64_v8i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
-; AVX1-NEXT: # xmm6 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT: vblendvpd %xmm4, %xmm1, %xmm7, %xmm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm5, %xmm7
+; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm5, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm9
+; AVX1-NEXT: vpxor %xmm3, %xmm9, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [255,255]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm3, %xmm9, %xmm5, %xmm3
+; AVX1-NEXT: vblendvpd %xmm8, %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vblendvpd %xmm7, %xmm6, %xmm5, %xmm3
+; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1
@@ -1578,19 +1578,19 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX1-LABEL: truncstore_v4i64_v4i16:
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
-; AVX1-NEXT: # xmm6 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm7, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm3, %xmm6, %xmm5, %xmm3
+; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
@@ -1860,19 +1860,19 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, ptr %p, <4 x i32> %mask) {
; AVX1-LABEL: truncstore_v4i64_v4i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
-; AVX1-NEXT: # xmm6 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm7, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT: vpxor %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [255,255]
+; AVX1-NEXT: # xmm5 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm3, %xmm6, %xmm5, %xmm3
+; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm5, %xmm0
; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
index a0909e370097d2..1719e2588db9ef 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll
@@ -878,19 +878,19 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) {
;
; AVX1-LABEL: trunc_usat_v4i64_v4i16:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm2 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854841343,9223372036854841343]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
-; AVX1-NEXT: # xmm5 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm1 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [65535,65535]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
@@ -1012,19 +1012,19 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, ptr%p1) {
;
; AVX1-LABEL: trunc_usat_v4i64_v4i16_store:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm2 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854841343,9223372036854841343]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [65535,65535]
-; AVX1-NEXT: # xmm5 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm1 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854841343,9223372036854841343]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [65535,65535]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, (%rdi)
@@ -1209,25 +1209,25 @@ define <8 x i16> @trunc_usat_v8i64_v8i16(ptr %p0) {
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854841343,9223372036854841343]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [65535,65535]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm6, %xmm1
-; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm7, %xmm2
-; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [65535,65535]
+; AVX1-NEXT: # xmm6 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm6, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_usat_v8i64_v8i16:
@@ -1998,19 +1998,19 @@ define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) {
;
; AVX1-LABEL: trunc_usat_v4i64_v4i8:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm2 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854776063,9223372036854776063]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [255,255]
-; AVX1-NEXT: # xmm5 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm1 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [255,255]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
@@ -2134,19 +2134,19 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, ptr%p1) {
;
; AVX1-LABEL: trunc_usat_v4i64_v4i8_store:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
-; AVX1-NEXT: # xmm2 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854776063,9223372036854776063]
-; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vmovddup {{.*#+}} xmm5 = [255,255]
-; AVX1-NEXT: # xmm5 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm3, %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT: # xmm1 = mem[0,0]
+; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854776063,9223372036854776063]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT: vpxor %xmm1, %xmm4, %xmm1
+; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [255,255]
+; AVX1-NEXT: # xmm3 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm1, %xmm4, %xmm3, %xmm1
+; AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
@@ -2331,25 +2331,25 @@ define <8 x i8> @trunc_usat_v8i64_v8i8(ptr %p0) {
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm6, %xmm1
-; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm7, %xmm2
-; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [255,255]
+; AVX1-NEXT: # xmm6 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm6, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -2516,25 +2516,25 @@ define void @trunc_usat_v8i64_v8i8_store(ptr %p0, ptr%p1) {
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm6, %xmm1
-; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm7, %xmm2
-; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [255,255]
+; AVX1-NEXT: # xmm6 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm6, %xmm0
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, (%rsi)
; AVX1-NEXT: retq
@@ -2796,50 +2796,51 @@ define <16 x i8> @trunc_usat_v16i64_v16i8(ptr %p0) {
; AVX1-LABEL: trunc_usat_v16i64_v16i8:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
; AVX1-NEXT: # xmm4 = mem[0,0]
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm5
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [9223372036854776063,9223372036854776063]
; AVX1-NEXT: # xmm6 = mem[0,0]
; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vmovddup {{.*#+}} xmm7 = [255,255]
-; AVX1-NEXT: # xmm7 = mem[0,0]
-; AVX1-NEXT: vblendvpd %xmm5, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm6, %xmm5
-; AVX1-NEXT: vblendvpd %xmm5, %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm1
-; AVX1-NEXT: vpcmpgtq %xmm1, %xmm6, %xmm1
-; AVX1-NEXT: vblendvpd %xmm1, %xmm3, %xmm7, %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm7, %xmm2
-; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovdqa 80(%rdi), %xmm1
-; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm6, %xmm2
-; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm7, %xmm1
-; AVX1-NEXT: vmovdqa 64(%rdi), %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm7, %xmm2
-; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vmovdqa 112(%rdi), %xmm2
-; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vblendvpd %xmm3, %xmm2, %xmm7, %xmm2
-; AVX1-NEXT: vmovdqa 96(%rdi), %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm7
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm6, %xmm7
+; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm6, %xmm8
+; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm9
+; AVX1-NEXT: vpcmpgtq %xmm9, %xmm6, %xmm9
+; AVX1-NEXT: vmovdqa 64(%rdi), %xmm10
+; AVX1-NEXT: vpxor %xmm4, %xmm10, %xmm11
+; AVX1-NEXT: vpcmpgtq %xmm11, %xmm6, %xmm11
+; AVX1-NEXT: vmovdqa 80(%rdi), %xmm12
+; AVX1-NEXT: vpxor %xmm4, %xmm12, %xmm13
+; AVX1-NEXT: vpcmpgtq %xmm13, %xmm6, %xmm13
+; AVX1-NEXT: vmovdqa 96(%rdi), %xmm14
+; AVX1-NEXT: vpxor %xmm4, %xmm14, %xmm15
+; AVX1-NEXT: vpcmpgtq %xmm15, %xmm6, %xmm15
+; AVX1-NEXT: vmovdqa 112(%rdi), %xmm0
+; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm4
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
-; AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm7, %xmm3
-; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vmovddup {{.*#+}} xmm6 = [255,255]
+; AVX1-NEXT: # xmm6 = mem[0,0]
+; AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vblendvpd %xmm15, %xmm14, %xmm6, %xmm4
+; AVX1-NEXT: vblendvpd %xmm13, %xmm12, %xmm6, %xmm12
+; AVX1-NEXT: vblendvpd %xmm11, %xmm10, %xmm6, %xmm10
+; AVX1-NEXT: vblendvpd %xmm9, %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vblendvpd %xmm8, %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vblendvpd %xmm7, %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vblendvpd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6, %xmm5 # 16-byte Folded Reload
+; AVX1-NEXT: vpackusdw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpackusdw %xmm12, %xmm10, %xmm4
+; AVX1-NEXT: vpackusdw %xmm0, %xmm4, %xmm0
+; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpackusdw %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: trunc_usat_v16i64_v16i8:
More information about the llvm-commits
mailing list