[llvm] 4b130b8 - [X86][SSE] SimplifyDemandedVectorEltsForTargetNode - reduce vector width of X86ISD::BLENDI
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 11:29:45 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-09T18:29:28Z
New Revision: 4b130b883d4ab6526ef464d54afd710d95fafa4f
URL: https://github.com/llvm/llvm-project/commit/4b130b883d4ab6526ef464d54afd710d95fafa4f
DIFF: https://github.com/llvm/llvm-project/commit/4b130b883d4ab6526ef464d54afd710d95fafa4f.diff
LOG: [X86][SSE] SimplifyDemandedVectorEltsForTargetNode - reduce vector width of X86ISD::BLENDI
If we don't need the upper subvector elements of the BLENDI node then use a smaller vector size.
This causes a couple of minor regressions in insertelement-ones.ll which are more examples of PR26018; given how cheap allones generation is I don't consider that a showstopper, just an annoyance (and there's plenty of other poor codegen cases in that file).
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/insertelement-ones.ll
llvm/test/CodeGen/X86/pr31956.ll
llvm/test/CodeGen/X86/vector-fshl-256.ll
llvm/test/CodeGen/X86/vector-fshl-512.ll
llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/test/CodeGen/X86/vector-fshr-256.ll
llvm/test/CodeGen/X86/vector-fshr-512.ll
llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5c9313d58014..fb8156b9ac45 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36400,6 +36400,23 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
+ // Vector blend by immediate.
+ case X86ISD::BLENDI: {
+ SDLoc DL(Op);
+ MVT ExtVT = VT.getSimpleVT();
+ ExtVT = MVT::getVectorVT(ExtVT.getScalarType(),
+ ExtSizeInBits / ExtVT.getScalarSizeInBits());
+ SDValue Ext0 =
+ extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits);
+ SDValue Ext1 =
+ extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits);
+ SDValue ExtOp =
+ TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Op.getOperand(2));
+ SDValue UndefVec = TLO.DAG.getUNDEF(VT);
+ SDValue Insert =
+ insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
+ return TLO.CombineTo(Op, Insert);
+ }
}
}
diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll
index ab95e18faea1..9de1d8e37c6b 100644
--- a/llvm/test/CodeGen/X86/insertelement-ones.ll
+++ b/llvm/test/CodeGen/X86/insertelement-ones.ll
@@ -290,18 +290,20 @@ define <16 x i16> @insert_v16i16_x12345x789ABCDEx(<16 x i16> %a) {
;
; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
-; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
+; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
;
; AVX512F-LABEL: insert_v16i16_x12345x789ABCDEx:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7],ymm1[8],ymm0[9,10,11,12,13],ymm1[14],ymm0[15]
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
-; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
+; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
+; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: insert_v16i16_x12345x789ABCDEx:
diff --git a/llvm/test/CodeGen/X86/pr31956.ll b/llvm/test/CodeGen/X86/pr31956.ll
index 0845051bfd4e..b8c8b9c12919 100644
--- a/llvm/test/CodeGen/X86/pr31956.ll
+++ b/llvm/test/CodeGen/X86/pr31956.ll
@@ -9,11 +9,10 @@ target triple = "x86_64-scei-ps4"
define <4 x float> @foo() {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7]
+; CHECK-NEXT: vmovaps {{.*}}(%rip), %xmm0
+; CHECK-NEXT: vmovlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
-; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
entry:
%V = load <2 x float>, <2 x float>* @G1, align 8
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 1c78d4e1a0cd..dee266a93b5f 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -1907,7 +1907,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
;
@@ -1917,7 +1917,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm2
; AVX512F-NEXT: vpor %ymm1, %ymm2, %ymm1
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512F-NEXT: retq
;
@@ -1927,7 +1927,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm2
; AVX512VL-NEXT: vpor %ymm1, %ymm2, %ymm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512VL-NEXT: retq
;
@@ -1940,7 +1940,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
; AVX512BW-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
; AVX512BW-NEXT: vpor %ymm1, %ymm2, %ymm1
-; AVX512BW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512BW-NEXT: retq
;
@@ -1953,7 +1953,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>
; AVX512VBMI2-NEXT: vpsllvw %zmm2, %zmm0, %zmm2
; AVX512VBMI2-NEXT: vpor %ymm1, %ymm2, %ymm1
-; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512VBMI2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512VBMI2-NEXT: retq
;
@@ -1992,7 +1992,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; XOPAVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1
; XOPAVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm2
; XOPAVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; XOPAVX2-NEXT: retq
%res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index e45428d8fff5..40706f415b94 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -1078,12 +1078,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm3
; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm5
; AVX512F-NEXT: vpor %ymm3, %ymm5, %ymm3
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7],ymm2[8],ymm3[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm1, %ymm1
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm3
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
@@ -1096,12 +1096,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm3
; AVX512VL-NEXT: vpmullw %ymm4, %ymm2, %ymm5
; AVX512VL-NEXT: vpor %ymm3, %ymm5, %ymm3
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7],ymm2[8],ymm3[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm3
; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
index 6a7865e51d2b..1c37b721b9c2 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll
@@ -523,13 +523,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768>
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
; AVX512F-NEXT: vpor %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
@@ -541,13 +541,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768>
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index 963cc5444119..4a1c3092acab 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -1910,7 +1910,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
-; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
;
@@ -1920,7 +1920,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512F-NEXT: retq
;
@@ -1930,7 +1930,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512VL-NEXT: retq
;
@@ -1943,7 +1943,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = <u,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1>
; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
; AVX512BW-NEXT: vpor %ymm2, %ymm0, %ymm0
-; AVX512BW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512BW-NEXT: retq
;
@@ -1956,7 +1956,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm3 = <u,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1>
; AVX512VBMI2-NEXT: vpsllvw %zmm3, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpor %ymm2, %ymm0, %ymm0
-; AVX512VBMI2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX512VBMI2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512VBMI2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512VBMI2-NEXT: retq
;
@@ -1995,7 +1995,7 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin
; XOPAVX2-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
; XOPAVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0
; XOPAVX2-NEXT: vpor %ymm3, %ymm0, %ymm0
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; XOPAVX2-NEXT: retq
%res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 6df29e86ce35..bbacb68ffa94 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -1066,12 +1066,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm3, %ymm5
; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpor %ymm5, %ymm2, %ymm2
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; AVX512F-NEXT: vpmulhuw %ymm4, %ymm1, %ymm3
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
@@ -1084,12 +1084,12 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm3, %ymm5
; AVX512VL-NEXT: vpmullw %ymm4, %ymm2, %ymm2
; AVX512VL-NEXT: vpor %ymm5, %ymm2, %ymm2
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm3[0],ymm2[1,2,3,4,5,6,7],ymm3[8],ymm2[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; AVX512VL-NEXT: vpmulhuw %ymm4, %ymm1, %ymm3
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
index d8d300f66c59..4862a88e4bf7 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll
@@ -543,13 +543,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
; AVX512F-NEXT: vpmullw %ymm4, %ymm1, %ymm1
; AVX512F-NEXT: vpor %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
-; AVX512F-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
+; AVX512F-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; AVX512F-NEXT: vpmullw %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0
@@ -561,13 +561,13 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind {
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm4 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7]
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2]
; AVX512VL-NEXT: vpmullw %ymm4, %ymm1, %ymm1
; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1
; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
-; AVX512VL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpblendw {{.*#+}} xmm3 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
; AVX512VL-NEXT: vpmullw %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpor %ymm2, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 72537b87f523..4315189d4133 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1184,9 +1184,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX2-LABEL: constant_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
;
@@ -1247,9 +1247,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; X32-AVX2-LABEL: constant_shift_v16i16:
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1
-; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X32-AVX2-NEXT: vpsraw $1, %xmm0, %xmm0
-; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15]
+; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X32-AVX2-NEXT: retl
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 24395c9169f0..2e7bc7a0207c 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -971,7 +971,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX2-LABEL: constant_shift_v16i16:
; AVX2: # %bb.0:
; AVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
;
@@ -986,14 +986,14 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; XOPAVX2-LABEL: constant_shift_v16i16:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; XOPAVX2-NEXT: retq
;
; AVX512DQ-LABEL: constant_shift_v16i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
-; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512DQ-NEXT: retq
;
@@ -1008,7 +1008,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; AVX512DQVL-LABEL: constant_shift_v16i16:
; AVX512DQVL: # %bb.0:
; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
-; AVX512DQVL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX512DQVL-NEXT: retq
;
@@ -1029,7 +1029,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
; X32-AVX2-LABEL: constant_shift_v16i16:
; X32-AVX2: # %bb.0:
; X32-AVX2-NEXT: vpmulhuw {{\.LCPI.*}}, %ymm0, %ymm1
-; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; X32-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; X32-AVX2-NEXT: retl
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
index 81ab84315ea8..78c33b2d1948 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -209,10 +209,10 @@ define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1
; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = <u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2>
; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3
-; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3,4,5,6,7],ymm1[8],ymm3[9,10,11,12,13,14,15]
+; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3,4,5,6,7]
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
; AVX512DQ-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2
-; AVX512DQ-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7],ymm0[8],ymm2[9,10,11,12,13,14,15]
+; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7]
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
index 0208bc3c4ff0..4cdddb4391e4 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -440,7 +440,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; AVX2-NEXT: retq
@@ -463,7 +463,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
; XOPAVX2-NEXT: retq
@@ -1490,7 +1490,7 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1
;
; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: retq
;
@@ -1507,7 +1507,7 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1
;
; XOPAVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
; XOPAVX2: # %bb.0:
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; XOPAVX2-NEXT: retq
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -7277,9 +7277,9 @@ define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) {
; AVX2-LABEL: PR34369:
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5]
+; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,10,11,u,u,u,u,u,u,4,5]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[3],xmm0[4,5,6],xmm2[7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -7310,9 +7310,8 @@ define <16 x i16> @PR34369(<16 x i16> %vec, <16 x i16> %mask) {
; XOPAVX2-LABEL: PR34369:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; XOPAVX2-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5]
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,0,1,u,u,10,11,4,5,4,5,u,u,30,31,16,17,28,29,16,17,18,19,20,21,24,25,24,25]
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7],ymm0[8,9,10],ymm2[11],ymm0[12,13,14],ymm2[15]
+; XOPAVX2-NEXT: vpperm {{.*#+}} xmm2 = xmm0[0,1,2,3,4,5],xmm2[10,11],xmm0[8,9,10,11,12,13],xmm2[4,5]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; XOPAVX2-NEXT: vpcmpeqw %ymm2, %ymm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index e9fc09668367..1e1bf04258cf 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -916,7 +916,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
@@ -946,7 +946,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; XOPAVX2-NEXT: retq
@@ -968,7 +968,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; AVX2-NEXT: retq
@@ -998,7 +998,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
; XOPAVX2: # %bb.0:
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15]
+; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
; XOPAVX2-NEXT: retq
More information about the llvm-commits
mailing list