[llvm] d8ad7cc - [DAGCombiner][X86] Improve narrowExtractedVectorLoad to handle cases where the element size isn't byte sized by the subvector is.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 08:42:01 PST 2020


Author: Craig Topper
Date: 2020-03-03T08:41:31-08:00
New Revision: d8ad7cc0885f32104a7cd83c77191aec15fd684f

URL: https://github.com/llvm/llvm-project/commit/d8ad7cc0885f32104a7cd83c77191aec15fd684f
DIFF: https://github.com/llvm/llvm-project/commit/d8ad7cc0885f32104a7cd83c77191aec15fd684f.diff

LOG: [DAGCombiner][X86] Improve narrowExtractedVectorLoad to handle cases where the element size isn't byte sized by the subvector is.

Summary:
Follow up from D75377. If the subvector is byte sized and the
index is aligned to the subvector size, we can shrink the load.

Reviewers: spatel, RKSimon

Reviewed By: RKSimon

Subscribers: dbabokin, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75434

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
    llvm/test/CodeGen/X86/avx512-mask-op.ll
    llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3b0b84e705f8..0265a6f18f33 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18662,10 +18662,23 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   // Allow targets to opt-out.
   EVT VT = Extract->getValueType(0);
 
-  // Only handle byte sized scalars otherwise the offset is incorrect.
-  // FIXME: We might be able to do better if the VT is byte sized and the index
-  // is aligned.
-  if (!VT.getScalarType().isByteSized())
+  // We can only create byte sized loads.
+  if (!VT.isByteSized())
+    return SDValue();
+
+  unsigned Index = ExtIdx->getZExtValue();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // If the index is a multiple of the extract element count, we can offset the
+  // address by the store size multiplied by the subvector index. Otherwise if
+  // the scalar type is byte sized, we can just use the index multiplied by
+  // the element size in bytes as the offset.
+  unsigned Offset;
+  if (Index % NumElts == 0)
+    Offset = (Index / NumElts) * VT.getStoreSize();
+  else if (VT.getScalarType().isByteSized())
+    Offset = Index * VT.getScalarType().getStoreSize();
+  else
     return SDValue();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -18675,8 +18688,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   // The narrow load will be offset from the base address of the old load if
   // we are extracting from something besides index 0 (little-endian).
   SDLoc DL(Extract);
-  SDValue BaseAddr = Ld->getOperand(1);
-  unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+  SDValue BaseAddr = Ld->getBasePtr();
 
   // TODO: Use "BaseIndexOffset" to make this more effective.
   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);

diff  --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
index b2728400d256..b30a81932d89 100644
--- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll
@@ -237,8 +237,7 @@ define void @load_v32i1_broadcast_16_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
 define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
 ; AVX512-LABEL: load_v32i1_broadcast_16_v8i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovd (%rdi), %k0
-; AVX512-NEXT:    kshiftrd $16, %k0, %k0
+; AVX512-NEXT:    kmovb 2(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm2
 ; AVX512-NEXT:    vpbroadcastd %xmm2, %ymm2
 ; AVX512-NEXT:    vpmovd2m %ymm2, %k1
@@ -249,8 +248,7 @@ define void @load_v32i1_broadcast_16_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
 ;
 ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrd $16, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 2(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm2, %ymm2
@@ -326,8 +324,7 @@ define void @load_v32i1_broadcast_31_v4i1(<32 x i1>* %a0,<4 x float> %a1,<4 x fl
 define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
 ; AVX512-LABEL: load_v32i1_broadcast_31_v8i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovd (%rdi), %k0
-; AVX512-NEXT:    kshiftrd $24, %k0, %k0
+; AVX512-NEXT:    kmovb 3(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm2
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
 ; AVX512-NEXT:    vpermd %ymm2, %ymm3, %ymm2
@@ -339,8 +336,8 @@ define void @load_v32i1_broadcast_31_v8i1(<32 x i1>* %a0,<8 x float> %a1,<8 x fl
 ;
 ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrd $24, %k0, %k1
+; AVX512NOTDQ-NEXT:    movzbl 3(%rdi), %eax
+; AVX512NOTDQ-NEXT:    kmovd %eax, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
@@ -417,8 +414,7 @@ define void @load_v64i1_broadcast_32_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
 define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
 ; AVX512-LABEL: load_v64i1_broadcast_32_v8i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $32, %k0, %k0
+; AVX512-NEXT:    kmovb 4(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm2
 ; AVX512-NEXT:    vpbroadcastd %xmm2, %ymm2
 ; AVX512-NEXT:    vpmovd2m %ymm2, %k1
@@ -429,8 +425,7 @@ define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $32, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 4(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm2, %ymm2
@@ -448,8 +443,7 @@ define void @load_v64i1_broadcast_32_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
 define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
 ; AVX512-LABEL: load_v64i1_broadcast_32_v16i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $32, %k0, %k0
+; AVX512-NEXT:    kmovw 4(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %zmm2
 ; AVX512-NEXT:    vpbroadcastd %xmm2, %zmm2
 ; AVX512-NEXT:    vpmovd2m %zmm2, %k1
@@ -460,8 +454,7 @@ define void @load_v64i1_broadcast_32_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $32, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 4(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm2, %zmm2
 ; AVX512NOTDQ-NEXT:    vptestmd %zmm2, %zmm2, %k1
@@ -536,8 +529,7 @@ define void @load_v64i1_broadcast_63_v4i1(<64 x i1>* %a0,<4 x float> %a1,<4 x fl
 define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x float> %a2,<8 x float>* %a3) {
 ; AVX512-LABEL: load_v64i1_broadcast_63_v8i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $56, %k0, %k0
+; AVX512-NEXT:    kmovb 7(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm2
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
 ; AVX512-NEXT:    vpermd %ymm2, %ymm3, %ymm2
@@ -549,8 +541,8 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $56, %k0, %k1
+; AVX512NOTDQ-NEXT:    movzbl 7(%rdi), %eax
+; AVX512NOTDQ-NEXT:    kmovd %eax, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm2, %ymm2, %ymm2
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm2, %ymm2 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7]
@@ -569,8 +561,7 @@ define void @load_v64i1_broadcast_63_v8i1(<64 x i1>* %a0,<8 x float> %a1,<8 x fl
 define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x float> %a2,<16 x float>* %a3) {
 ; AVX512-LABEL: load_v64i1_broadcast_63_v16i1:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $48, %k0, %k0
+; AVX512-NEXT:    kmovw 6(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %zmm2
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512-NEXT:    vpermd %zmm2, %zmm3, %zmm2
@@ -582,8 +573,7 @@ define void @load_v64i1_broadcast_63_v16i1(<64 x i1>* %a0,<16 x float> %a1,<16 x
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $48, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 6(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} zmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512NOTDQ-NEXT:    vpermd %zmm2, %zmm3, %zmm2
@@ -1037,8 +1027,7 @@ define void @load_v32i1_broadcast_16_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
 define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
 ; AVX512-LABEL: load_v32i1_broadcast_16_v8i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovd (%rdi), %k0
-; AVX512-NEXT:    kshiftrd $16, %k0, %k0
+; AVX512-NEXT:    kmovb 2(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm0
 ; AVX512-NEXT:    vpbroadcastd %xmm0, %ymm0
 ; AVX512-NEXT:    vpmovd2m %ymm0, %k0
@@ -1048,8 +1037,7 @@ define void @load_v32i1_broadcast_16_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
 ;
 ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v8i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrd $16, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 2(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm0, %ymm0
@@ -1140,8 +1128,7 @@ define void @load_v32i1_broadcast_31_v4i1_store(<32 x i1>* %a0,<4 x i1>* %a1) {
 define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
 ; AVX512-LABEL: load_v32i1_broadcast_31_v8i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovd (%rdi), %k0
-; AVX512-NEXT:    kshiftrd $24, %k0, %k0
+; AVX512-NEXT:    kmovb 3(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm0
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
 ; AVX512-NEXT:    vpermd %ymm0, %ymm1, %ymm0
@@ -1152,8 +1139,8 @@ define void @load_v32i1_broadcast_31_v8i1_store(<32 x i1>* %a0,<8 x i1>* %a1) {
 ;
 ; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v8i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovd (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrd $24, %k0, %k1
+; AVX512NOTDQ-NEXT:    movzbl 3(%rdi), %eax
+; AVX512NOTDQ-NEXT:    kmovd %eax, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
@@ -1245,8 +1232,7 @@ define void @load_v64i1_broadcast_32_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
 define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
 ; AVX512-LABEL: load_v64i1_broadcast_32_v8i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $32, %k0, %k0
+; AVX512-NEXT:    kmovb 4(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm0
 ; AVX512-NEXT:    vpbroadcastd %xmm0, %ymm0
 ; AVX512-NEXT:    vpmovd2m %ymm0, %k0
@@ -1256,8 +1242,7 @@ define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v8i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $32, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 4(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm0, %ymm0
@@ -1274,8 +1259,7 @@ define void @load_v64i1_broadcast_32_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
 define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
 ; AVX512-LABEL: load_v64i1_broadcast_32_v16i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $32, %k0, %k0
+; AVX512-NEXT:    kmovw 4(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512-NEXT:    vpbroadcastd %xmm0, %zmm0
 ; AVX512-NEXT:    vpmovd2m %zmm0, %k0
@@ -1285,8 +1269,7 @@ define void @load_v64i1_broadcast_32_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v16i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $32, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 4(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd %xmm0, %zmm0
 ; AVX512NOTDQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -1375,8 +1358,7 @@ define void @load_v64i1_broadcast_63_v4i1_store(<64 x i1>* %a0,<4 x i1>* %a1) {
 define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
 ; AVX512-LABEL: load_v64i1_broadcast_63_v8i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $56, %k0, %k0
+; AVX512-NEXT:    kmovb 7(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %ymm0
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
 ; AVX512-NEXT:    vpermd %ymm0, %ymm1, %ymm0
@@ -1387,8 +1369,8 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v8i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $56, %k0, %k1
+; AVX512NOTDQ-NEXT:    movzbl 7(%rdi), %eax
+; AVX512NOTDQ-NEXT:    kmovd %eax, %k1
 ; AVX512NOTDQ-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
 ; AVX512NOTDQ-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [7,7,7,7,7,7,7,7]
@@ -1406,8 +1388,7 @@ define void @load_v64i1_broadcast_63_v8i1_store(<64 x i1>* %a0,<8 x i1>* %a1) {
 define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1) {
 ; AVX512-LABEL: load_v64i1_broadcast_63_v16i1_store:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    kmovq (%rdi), %k0
-; AVX512-NEXT:    kshiftrq $48, %k0, %k0
+; AVX512-NEXT:    kmovw 6(%rdi), %k0
 ; AVX512-NEXT:    vpmovm2d %k0, %zmm0
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512-NEXT:    vpermd %zmm0, %zmm1, %zmm0
@@ -1418,8 +1399,7 @@ define void @load_v64i1_broadcast_63_v16i1_store(<64 x i1>* %a0,<16 x i1>* %a1)
 ;
 ; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v16i1_store:
 ; AVX512NOTDQ:       # %bb.0:
-; AVX512NOTDQ-NEXT:    kmovq (%rdi), %k0
-; AVX512NOTDQ-NEXT:    kshiftrq $48, %k0, %k1
+; AVX512NOTDQ-NEXT:    kmovw 6(%rdi), %k1
 ; AVX512NOTDQ-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
 ; AVX512NOTDQ-NEXT:    vpbroadcastd {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
 ; AVX512NOTDQ-NEXT:    vpermd %zmm0, %zmm1, %zmm0

diff  --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 35ecdae61a30..b9f9ffba55d8 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -278,10 +278,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
 ;
 ; X86-LABEL: shuf_test1:
 ; X86:       ## %bb.0:
-; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0
-; X86-NEXT:    kshiftrw $8, %k0, %k0
-; X86-NEXT:    kmovd %k0, %eax
-; X86-NEXT:    ## kill: def $al killed $al killed $eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
    %v1 = bitcast i16 %v to <16 x i1>
    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

diff  --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index 22081b43720f..0ca1f7be8852 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -7,8 +7,8 @@ declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
 define i32 at test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
 ; X86-LABEL: test_int_x86_avx512_kunpck_wd:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
 ; X86-NEXT:    kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -29,10 +29,8 @@ declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
 define i64 at test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
 ; X86-LABEL: test_int_x86_avx512_kunpck_qd:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04]
-; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
-; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
-; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
 ; X64-LABEL: test_int_x86_avx512_kunpck_qd:


        


More information about the llvm-commits mailing list