[llvm] 0696a53 - Revert "[DAG] Support store merging of vector constant stores"

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 10 07:58:09 PDT 2023


Author: Philip Reames
Date: 2023-08-10T07:58:00-07:00
New Revision: 0696a531c201327988cbc2fc5320c61aa3a890bf

URL: https://github.com/llvm/llvm-project/commit/0696a531c201327988cbc2fc5320c61aa3a890bf
DIFF: https://github.com/llvm/llvm-project/commit/0696a531c201327988cbc2fc5320c61aa3a890bf.diff

LOG: Revert "[DAG] Support store merging of vector constant stores"

This reverts commit 660b740e4b3c4b23dfba36940ae0fe2ad41bfedf.  Crash reported in the review thread post commit.  Reverting while investigating.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
    llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
    llvm/test/CodeGen/X86/avx512-mask-op.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2b2f5c8e29ead5..374730ba15ee3e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -698,11 +698,6 @@ namespace {
       case ISD::Constant:
       case ISD::ConstantFP:
         return StoreSource::Constant;
-      case ISD::BUILD_VECTOR:
-        if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
-            ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
-          return StoreSource::Constant;
-        return StoreSource::Unknown;
       case ISD::EXTRACT_VECTOR_ELT:
       case ISD::EXTRACT_SUBVECTOR:
         return StoreSource::Extract;
@@ -19500,10 +19495,6 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
         // If fp truncation is necessary give up for now.
         if (MemVT.getSizeInBits() != ElementSizeBits)
           return false;
-      } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
-                 ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
-        // Not yet handled
-        return false;
       } else {
         llvm_unreachable("Invalid constant element type");
       }
@@ -19634,7 +19625,7 @@ void DAGCombiner::getStoreMergeCandidates(
     case StoreSource::Constant:
       if (NoTypeMatch)
         return false;
-      if (getStoreSource(OtherBC) != StoreSource::Constant)
+      if (!isIntOrFPConstant(OtherBC))
         return false;
       break;
     case StoreSource::Extract:
@@ -19856,8 +19847,6 @@ bool DAGCombiner::tryStoreMergeOfConstants(
         IsElementZero = C->isZero();
       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
         IsElementZero = C->getConstantFPValue()->isNullValue();
-      else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
-        IsElementZero = true;
       if (IsElementZero) {
         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
           FirstZeroAfterNonZero = i;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
index 4730c2755acdba..742fead8a81d47 100644
--- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll
@@ -544,31 +544,53 @@ define void @bzero_32(ptr %a) nounwind {
 define void @bzero_64(ptr %a) nounwind {
 ; RV32-LABEL: bzero_64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 64
-; RV32-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV32-NEXT:    addi a1, a0, 48
+; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV32-NEXT:    vmv.v.i v8, 0
+; RV32-NEXT:    vse8.v v8, (a1)
+; RV32-NEXT:    addi a1, a0, 32
+; RV32-NEXT:    vse8.v v8, (a1)
+; RV32-NEXT:    addi a1, a0, 16
+; RV32-NEXT:    vse8.v v8, (a1)
 ; RV32-NEXT:    vse8.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: bzero_64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 64
-; RV64-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
+; RV64-NEXT:    addi a1, a0, 48
+; RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
+; RV64-NEXT:    vse8.v v8, (a1)
+; RV64-NEXT:    addi a1, a0, 32
+; RV64-NEXT:    vse8.v v8, (a1)
+; RV64-NEXT:    addi a1, a0, 16
+; RV64-NEXT:    vse8.v v8, (a1)
 ; RV64-NEXT:    vse8.v v8, (a0)
 ; RV64-NEXT:    ret
 ;
 ; RV32-FAST-LABEL: bzero_64:
 ; RV32-FAST:       # %bb.0:
-; RV32-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-FAST-NEXT:    addi a1, a0, 48
+; RV32-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-FAST-NEXT:    vmv.v.i v8, 0
+; RV32-FAST-NEXT:    vse64.v v8, (a1)
+; RV32-FAST-NEXT:    addi a1, a0, 32
+; RV32-FAST-NEXT:    vse64.v v8, (a1)
+; RV32-FAST-NEXT:    addi a1, a0, 16
+; RV32-FAST-NEXT:    vse64.v v8, (a1)
 ; RV32-FAST-NEXT:    vse64.v v8, (a0)
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: bzero_64:
 ; RV64-FAST:       # %bb.0:
-; RV64-FAST-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-FAST-NEXT:    addi a1, a0, 48
+; RV64-FAST-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-FAST-NEXT:    vmv.v.i v8, 0
+; RV64-FAST-NEXT:    vse64.v v8, (a1)
+; RV64-FAST-NEXT:    addi a1, a0, 32
+; RV64-FAST-NEXT:    vse64.v v8, (a1)
+; RV64-FAST-NEXT:    addi a1, a0, 16
+; RV64-FAST-NEXT:    vse64.v v8, (a1)
 ; RV64-FAST-NEXT:    vse64.v v8, (a0)
 ; RV64-FAST-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
@@ -664,15 +686,27 @@ define void @aligned_bzero_32(ptr %a) nounwind {
 define void @aligned_bzero_64(ptr %a) nounwind {
 ; RV32-BOTH-LABEL: aligned_bzero_64:
 ; RV32-BOTH:       # %bb.0:
-; RV32-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-BOTH-NEXT:    addi a1, a0, 48
+; RV32-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-BOTH-NEXT:    vmv.v.i v8, 0
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 32
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 16
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV32-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV32-BOTH-NEXT:    ret
 ;
 ; RV64-BOTH-LABEL: aligned_bzero_64:
 ; RV64-BOTH:       # %bb.0:
-; RV64-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-BOTH-NEXT:    addi a1, a0, 48
+; RV64-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-BOTH-NEXT:    vmv.v.i v8, 0
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 32
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 16
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV64-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV64-BOTH-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
@@ -683,16 +717,28 @@ define void @aligned_bzero_66(ptr %a) nounwind {
 ; RV32-BOTH-LABEL: aligned_bzero_66:
 ; RV32-BOTH:       # %bb.0:
 ; RV32-BOTH-NEXT:    sh zero, 64(a0)
-; RV32-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV32-BOTH-NEXT:    addi a1, a0, 48
+; RV32-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-BOTH-NEXT:    vmv.v.i v8, 0
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 32
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 16
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV32-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV32-BOTH-NEXT:    ret
 ;
 ; RV64-BOTH-LABEL: aligned_bzero_66:
 ; RV64-BOTH:       # %bb.0:
 ; RV64-BOTH-NEXT:    sh zero, 64(a0)
-; RV64-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-BOTH-NEXT:    addi a1, a0, 48
+; RV64-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-BOTH-NEXT:    vmv.v.i v8, 0
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 32
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 16
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV64-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV64-BOTH-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 66, i1 0)
@@ -708,8 +754,12 @@ define void @aligned_bzero_96(ptr %a) nounwind {
 ; RV32-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV32-BOTH-NEXT:    addi a1, a0, 64
 ; RV32-BOTH-NEXT:    vse64.v v8, (a1)
-; RV32-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV32-BOTH-NEXT:    vmv.v.i v8, 0
+; RV32-BOTH-NEXT:    addi a1, a0, 48
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 32
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 16
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV32-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV32-BOTH-NEXT:    ret
 ;
@@ -721,8 +771,12 @@ define void @aligned_bzero_96(ptr %a) nounwind {
 ; RV64-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV64-BOTH-NEXT:    addi a1, a0, 64
 ; RV64-BOTH-NEXT:    vse64.v v8, (a1)
-; RV64-BOTH-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64-BOTH-NEXT:    vmv.v.i v8, 0
+; RV64-BOTH-NEXT:    addi a1, a0, 48
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 32
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 16
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV64-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV64-BOTH-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 96, i1 0)
@@ -732,15 +786,43 @@ define void @aligned_bzero_96(ptr %a) nounwind {
 define void @aligned_bzero_128(ptr %a) nounwind {
 ; RV32-BOTH-LABEL: aligned_bzero_128:
 ; RV32-BOTH:       # %bb.0:
-; RV32-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-BOTH-NEXT:    addi a1, a0, 112
+; RV32-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-BOTH-NEXT:    vmv.v.i v8, 0
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 96
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 80
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 64
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 48
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 32
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 16
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV32-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV32-BOTH-NEXT:    ret
 ;
 ; RV64-BOTH-LABEL: aligned_bzero_128:
 ; RV64-BOTH:       # %bb.0:
-; RV64-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-BOTH-NEXT:    addi a1, a0, 112
+; RV64-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-BOTH-NEXT:    vmv.v.i v8, 0
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 96
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 80
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 64
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 48
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 32
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 16
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV64-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV64-BOTH-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 128, i1 0)
@@ -750,19 +832,75 @@ define void @aligned_bzero_128(ptr %a) nounwind {
 define void @aligned_bzero_256(ptr %a) nounwind {
 ; RV32-BOTH-LABEL: aligned_bzero_256:
 ; RV32-BOTH:       # %bb.0:
-; RV32-BOTH-NEXT:    addi a1, a0, 128
-; RV32-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-BOTH-NEXT:    addi a1, a0, 240
+; RV32-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-BOTH-NEXT:    vmv.v.i v8, 0
 ; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 224
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 208
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 192
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 176
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 160
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 144
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 128
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 112
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 96
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 80
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 64
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 48
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 32
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
+; RV32-BOTH-NEXT:    addi a1, a0, 16
+; RV32-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV32-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV32-BOTH-NEXT:    ret
 ;
 ; RV64-BOTH-LABEL: aligned_bzero_256:
 ; RV64-BOTH:       # %bb.0:
-; RV64-BOTH-NEXT:    addi a1, a0, 128
-; RV64-BOTH-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-BOTH-NEXT:    addi a1, a0, 240
+; RV64-BOTH-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-BOTH-NEXT:    vmv.v.i v8, 0
 ; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 224
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 208
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 192
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 176
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 160
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 144
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 128
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 112
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 96
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 80
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 64
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 48
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 32
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
+; RV64-BOTH-NEXT:    addi a1, a0, 16
+; RV64-BOTH-NEXT:    vse64.v v8, (a1)
 ; RV64-BOTH-NEXT:    vse64.v v8, (a0)
 ; RV64-BOTH-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 256, i1 0)

diff  --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
index b820023c961aa1..565d3588710e36 100644
--- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -767,61 +767,20 @@ define void @merge_vec_stores_from_loads(<4 x float>* %v, <4 x float>* %ptr) {
 
 }
 
-define void @merge_vec_stores_of_zero(<4 x i32>* %ptr) {
-; CHECK-LABEL: merge_vec_stores_of_zero:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vmovups %ymm0, 48(%rdi)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
-  %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
-  store <4 x i32> zeroinitializer, <4 x i32>* %idx0, align 16
-  store <4 x i32> zeroinitializer, <4 x i32>* %idx1, align 16
-  ret void
-}
-
-define void @merge_vec_stores_of_constant_splat(<4 x i32>* %ptr) {
-; CHECK-LABEL: merge_vec_stores_of_constant_splat:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vbroadcastss {{.*#+}} xmm0 = [42,42,42,42]
-; CHECK-NEXT:    vmovaps %xmm0, 48(%rdi)
-; CHECK-NEXT:    vmovaps %xmm0, 64(%rdi)
-; CHECK-NEXT:    retq
-  %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
-  %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
-  store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %idx0, align 16
-  store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %idx1, align 16
-  ret void
-}
-
+; Merging vector stores when sourced from a constant vector is not currently handled.
 define void @merge_vec_stores_of_constants(<4 x i32>* %ptr) {
 ; CHECK-LABEL: merge_vec_stores_of_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [25,51,45,0]
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovaps %xmm0, 48(%rdi)
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [0,265,26,0]
 ; CHECK-NEXT:    vmovaps %xmm0, 64(%rdi)
 ; CHECK-NEXT:    retq
   %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
   %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
-  store <4 x i32> <i32 25, i32 51, i32 45, i32 0>, <4 x i32>* %idx0, align 16
-  store <4 x i32> <i32 0, i32 265, i32 26, i32 0>, <4 x i32>* %idx1, align 16
+  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx0, align 16
+  store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32>* %idx1, align 16
   ret void
-}
 
-define void @merge_vec_stores_of_constants_with_undefs(<4 x i32>* %ptr) {
-; CHECK-LABEL: merge_vec_stores_of_constants_with_undefs:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vmovups %ymm0, 48(%rdi)
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %idx0 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 3
-  %idx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 4
-  store <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32>* %idx0, align 16
-  store <4 x i32> <i32 0, i32 undef, i32 0, i32 0>, <4 x i32>* %idx1, align 16
-  ret void
 }
 
 ; This is a minimized test based on real code that was failing.
@@ -896,17 +855,17 @@ define void @merge_const_store_heterogeneous(i32 %count, %struct.C* nocapture %p
 ; CHECK-LABEL: merge_const_store_heterogeneous:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jle .LBB23_3
+; CHECK-NEXT:    jle .LBB20_3
 ; CHECK-NEXT:  # %bb.1: # %.lr.ph.preheader
 ; CHECK-NEXT:    movabsq $578437695752307201, %rax # imm = 0x807060504030201
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  .LBB23_2: # %.lr.ph
+; CHECK-NEXT:  .LBB20_2: # %.lr.ph
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movq %rax, (%rsi)
 ; CHECK-NEXT:    addq $24, %rsi
 ; CHECK-NEXT:    decl %edi
-; CHECK-NEXT:    jne .LBB23_2
-; CHECK-NEXT:  .LBB23_3: # %._crit_edge
+; CHECK-NEXT:    jne .LBB20_2
+; CHECK-NEXT:  .LBB20_3: # %._crit_edge
 ; CHECK-NEXT:    retq
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge

diff  --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 9e689341f7b88e..d9f051c69cb237 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -4211,33 +4211,13 @@ entry:
 }
 
 define void @store_v128i1_constant(ptr %R) {
-; KNL-LABEL: store_v128i1_constant:
-; KNL:       ## %bb.0: ## %entry
-; KNL-NEXT:    vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151]
-; KNL-NEXT:    vmovaps %xmm0, (%rdi)
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: store_v128i1_constant:
-; SKX:       ## %bb.0: ## %entry
-; SKX-NEXT:    movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD
-; SKX-NEXT:    movq %rax, 8(%rdi)
-; SKX-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
-; SKX-NEXT:    movq %rax, (%rdi)
-; SKX-NEXT:    retq
-;
-; AVX512BW-LABEL: store_v128i1_constant:
-; AVX512BW:       ## %bb.0: ## %entry
-; AVX512BW-NEXT:    movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD
-; AVX512BW-NEXT:    movq %rax, 8(%rdi)
-; AVX512BW-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
-; AVX512BW-NEXT:    movq %rax, (%rdi)
-; AVX512BW-NEXT:    retq
-;
-; AVX512DQ-LABEL: store_v128i1_constant:
-; AVX512DQ:       ## %bb.0: ## %entry
-; AVX512DQ-NEXT:    vmovaps {{.*#+}} xmm0 = [61437,65535,65403,57343,57341,65535,65467,49151]
-; AVX512DQ-NEXT:    vmovaps %xmm0, (%rdi)
-; AVX512DQ-NEXT:    retq
+; CHECK-LABEL: store_v128i1_constant:
+; CHECK:       ## %bb.0: ## %entry
+; CHECK-NEXT:    movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD
+; CHECK-NEXT:    movq %rax, 8(%rdi)
+; CHECK-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: store_v128i1_constant:
 ; X86:       ## %bb.0: ## %entry


        


More information about the llvm-commits mailing list