[llvm-commits] [llvm] r165267 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/MergeConsecutiveStores.ll

Nadav Rotem nrotem at apple.com
Thu Oct 4 15:35:15 PDT 2012


Author: nadav
Date: Thu Oct  4 17:35:15 2012
New Revision: 165267

URL: http://llvm.org/viewvc/llvm-project?rev=165267&view=rev
Log:
When merging connsecutive stores, use vectors to store the constant zero.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=165267&r1=165266&r2=165267&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Oct  4 17:35:15 2012
@@ -7570,13 +7570,20 @@
   if (!IsLoadSrc) {
     unsigned LastConst = 0;
     unsigned LastLegalType = 0;
+    unsigned LastLegalVectorType = 0;
+    bool NonZero = false;
     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
       SDValue StoredVal = St->getValue();
-      bool IsConst = (isa<ConstantSDNode>(StoredVal) ||
-                      isa<ConstantFPSDNode>(StoredVal));
-      if (!IsConst)
+
+      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
+        NonZero |= (C->getZExtValue() != 0);
+      } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
+        NonZero |= C->getValueAPF().bitcastToAPInt().getZExtValue();
+      } else {
+        // Non constant.
         break;
+      }
 
       // Mark this index as the largest legal constant.
       LastConst = i;
@@ -7586,16 +7593,27 @@
       EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
       if (TLI.isTypeLegal(StoreTy))
         LastLegalType = i+1;
+
+      // Find a legal type for the vector store.
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+      if (TLI.isTypeLegal(Ty))
+        LastLegalVectorType = i + 1;
     }
 
+    // We only use vectors if the constant is known to be zero.
+    if (NonZero)
+      LastLegalVectorType = 0;
+
     // Check if we found a legal integer type to store.
-    if (LastLegalType == 0)
+    if (LastLegalType == 0 && LastLegalVectorType == 0)
       return false;
 
-    // We add a +1 because the LastXXX variables refer to array location
-    // while NumElem holds the size.
-    unsigned NumElem = std::min(LastConsecutiveStore, LastConst) + 1;
-    NumElem = std::min(LastLegalType, NumElem);
+    bool UseVector = LastLegalVectorType > LastLegalType;
+    unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
+
+    // Make sure we have something to merge.
+    if (NumElem < 2)
+      return false;
 
     unsigned EarliestNodeUsed = 0;
     for (unsigned i=0; i < NumElem; ++i) {
@@ -7609,36 +7627,41 @@
 
     // The earliest Node in the DAG.
     LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
-
-    // Make sure we have something to merge.
-    if (NumElem < 2)
-      return false;
-
     DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc();
-    unsigned StoreBW = NumElem * ElementSizeBytes * 8;
-    APInt StoreInt(StoreBW, 0);
 
-    // Construct a single integer constant which is made of the smaller
-    // constant inputs.
-    bool IsLE = TLI.isLittleEndian();
-    for (unsigned i = 0; i < NumElem ; ++i) {
-      unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
-      StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
-      SDValue Val = St->getValue();
-      StoreInt<<=ElementSizeBytes*8;
-      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
-        StoreInt|=C->getAPIntValue().zext(StoreBW);
-      } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
-        StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
-      } else {
-        assert(false && "Invalid constant element type");
+    SDValue StoredVal;
+    if (UseVector) {
+      // Find a legal type for the vector store.
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+      assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+      StoredVal = DAG.getConstant(0, Ty);
+    } else {
+      unsigned StoreBW = NumElem * ElementSizeBytes * 8;
+      APInt StoreInt(StoreBW, 0);
+
+      // Construct a single integer constant which is made of the smaller
+      // constant inputs.
+      bool IsLE = TLI.isLittleEndian();
+      for (unsigned i = 0; i < NumElem ; ++i) {
+        unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
+        StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+        SDValue Val = St->getValue();
+        StoreInt<<=ElementSizeBytes*8;
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
+          StoreInt|=C->getAPIntValue().zext(StoreBW);
+        } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
+          StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
+        } else {
+          assert(false && "Invalid constant element type");
+        }
       }
+
+      // Create the new Load and Store operations.
+      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
+      StoredVal = DAG.getConstant(StoreInt, StoreTy);
     }
 
-    // Create the new Load and Store operations.
-    EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
-    SDValue WideInt = DAG.getConstant(StoreInt, StoreTy);
-    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, WideInt,
+    SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
                                     FirstInChain->getBasePtr(),
                                     FirstInChain->getPointerInfo(),
                                     false, false,
@@ -8027,7 +8050,7 @@
   }
 
   // Only perform this optimization before the types are legal, because we
-  // don't want to perform this optimization multiple times.
+  // don't want to perform this optimization on every DAGCombine invocation.
   if (!LegalTypes && MergeConsecutiveStores(ST))
     return SDValue(N, 0);
 

Modified: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll?rev=165267&r1=165266&r2=165267&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll (original)
+++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll Thu Oct  4 17:35:15 2012
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -6,7 +6,6 @@
 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
 
-; Move all of the constants using a single vector store.
 ; CHECK: merge_const_store
 ; save 1,2,3 ... as one big integer.
 ; CHECK: movabsq $578437695752307201
@@ -41,6 +40,40 @@
   ret void
 }
 
+; Move the constants using a single vector store.
+; CHECK: merge_const_store_vec
+; CHECK: vmovups  %ymm0, (%rsi)
+; CHECK: ret
+define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
+  %1 = icmp sgt i32 %count, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+.lr.ph:
+  %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
+  %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ]
+  %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1
+  store i32 0, i32* %3, align 4
+  %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2
+  store i32 0, i32* %4, align 4
+  %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3
+  store i32 0, i32* %5, align 4
+  %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4
+  store i32 0, i32* %6, align 4
+  %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5
+  store i32 0, i32* %7, align 4
+  %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6
+  store i32 0, i32* %8, align 4
+  %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7
+  store i32 0, i32* %9, align 4
+  %10 = add nsw i32 %i.02, 1
+  %11 = getelementptr inbounds %struct.B* %.01, i64 1
+  %exitcond = icmp eq i32 %10, %count
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+._crit_edge:
+  ret void
+}
+
 ; Move the first 4 constants as a single vector. Move the rest as scalars.
 ; CHECK: merge_nonconst_store
 ; CHECK: movl $67305985
@@ -223,7 +256,6 @@
   ret void
 }
 
-
 ;CHECK: merge_loads_no_align
 ; load:
 ;CHECK: movl





More information about the llvm-commits mailing list