[llvm-commits] [llvm] r92098 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h lib/Transforms/Scalar/SimplifyLibCalls.cpp test/CodeGen/X86/memcmp.ll test/Transforms/SimplifyLibCalls/memcmp.ll

Chris Lattner sabre at nondot.org
Wed Dec 23 16:37:38 PST 2009


Author: lattner
Date: Wed Dec 23 18:37:38 2009
New Revision: 92098

URL: http://llvm.org/viewvc/llvm-project?rev=92098&view=rev
Log:
move an optimization for memcmp out of simplifylibcalls and into 
SDISel.  This optimization was causing simplifylibcalls to 
introduce type-unsafe nastiness.  This is the first step, I'll be 
expanding the memcmp optimizations shortly, covering things that
we really really wouldn't want simplifylibcalls to do.

Added:
    llvm/trunk/test/CodeGen/X86/memcmp.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
    llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp
    llvm/trunk/test/Transforms/SimplifyLibCalls/memcmp.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=92098&r1=92097&r2=92098&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Wed Dec 23 18:37:38 2009
@@ -17,6 +17,7 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
@@ -5075,6 +5076,105 @@
   }
 }
 
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size,
+                             SelectionDAGBuilder &Builder) {
+  MVT LoadVT;
+  const Type *LoadTy;
+  if (Size == 2) {
+    LoadVT = MVT::i16;
+    LoadTy = Type::getInt16Ty(PtrVal->getContext());
+  } else {
+    LoadVT = MVT::i32;
+    LoadTy = Type::getInt32Ty(PtrVal->getContext()); 
+  }
+  
+  // Check to see if this load can be trivially constant folded, e.g. if the
+  // input is from a string literal.
+  if (Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+    // Cast pointer to the type we really want to load.
+    LoadInput = ConstantExpr::getBitCast(LoadInput,
+                                         PointerType::getUnqual(LoadTy));
+    
+    if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD))
+      return Builder.getValue(LoadCst);
+  }
+  
+  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
+  // still constant memory, the input chain can be the entry node.
+  SDValue Root;
+  bool ConstantMemory = false;
+  
+  // Do not serialize (non-volatile) loads of constant memory with anything.
+  if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+    Root = Builder.DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = Builder.DAG.getRoot();
+  }
+  
+  SDValue Ptr = Builder.getValue(PtrVal);
+  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+                                        false /*volatile*/, 1 /* align=1 */);
+  
+  if (!ConstantMemory)
+    Builder.PendingLoads.push_back(LoadVal.getValue(1));
+  return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
+  // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
+  if (I.getNumOperands() != 4)
+    return false;
+  
+  Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+  if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
+      !isa<IntegerType>(I.getOperand(3)->getType()) ||
+      !isa<IntegerType>(I.getType()))
+    return false;    
+  
+  ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+  
+  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
+  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
+  if (Size && (Size->getValue() == 2 || Size->getValue() == 4) &&
+      IsOnlyUsedInZeroEqualityComparison(&I)) {
+    SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this);
+    SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this);
+    
+    SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+                               ISD::SETNE);
+    EVT CallVT = TLI.getValueType(I.getType(), true);
+    setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+    return true;
+  }
+  
+  
+  return false;
+}
+
+
 void SelectionDAGBuilder::visitCall(CallInst &I) {
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
@@ -5148,6 +5248,9 @@
                                    Tmp.getValueType(), Tmp));
           return;
         }
+      } else if (Name == "memcmp") {
+        if (visitMemCmpCall(I))
+          return;
       }
     }
   } else if (isa<InlineAsm>(I.getOperand(0))) {

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h?rev=92098&r1=92097&r2=92098&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h Wed Dec 23 18:37:38 2009
@@ -91,11 +91,13 @@
 
   DenseMap<const Value*, SDValue> NodeMap;
 
+public:
   /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
   /// them up and then emit token factor nodes when possible.  This allows us to
   /// get simple disambiguation between loads without worrying about alias
   /// analysis.
   SmallVector<SDValue, 8> PendingLoads;
+private:
 
   /// PendingExports - CopyToReg nodes that copy values to virtual registers
   /// for export to other blocks need to be emitted before any terminator
@@ -461,6 +463,8 @@
   void visitStore(StoreInst &I);
   void visitPHI(PHINode &I) { } // PHI nodes are handled specially.
   void visitCall(CallInst &I);
+  bool visitMemCmpCall(CallInst &I);
+  
   void visitInlineAsm(CallSite CS);
   const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic);
   void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic);

Modified: llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp?rev=92098&r1=92097&r2=92098&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/SimplifyLibCalls.cpp Wed Dec 23 18:37:38 2009
@@ -1011,19 +1011,6 @@
       return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
     }
 
-    // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS)  != 0
-    // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS)  != 0
-    if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) {
-      const Type *PTy = PointerType::getUnqual(Len == 2 ?
-                       Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context));
-      LHS = B.CreateBitCast(LHS, PTy, "tmp");
-      RHS = B.CreateBitCast(RHS, PTy, "tmp");
-      LoadInst *LHSV = B.CreateLoad(LHS, "lhsv");
-      LoadInst *RHSV = B.CreateLoad(RHS, "rhsv");
-      LHSV->setAlignment(1); RHSV->setAlignment(1);  // Unaligned loads.
-      return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
-    }
-
     // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
     std::string LHSStr, RHSStr;
     if (GetConstantStringInfo(LHS, LHSStr) &&

Added: llvm/trunk/test/CodeGen/X86/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcmp.ll?rev=92098&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcmp.ll (added)
+++ llvm/trunk/test/CodeGen/X86/memcmp.ll Wed Dec 23 18:37:38 2009
@@ -0,0 +1,76 @@
+; RUN: llc %s -o - -march=x86-64 | FileCheck %s
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+ at .str = private constant [6 x i8] c"fooxx\00", align 1 ; <[6 x i8]*> [#uses=1]
+
+declare i32 @memcmp(...)
+
+define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2:
+; CHECK: movw    (%rsi), %ax
+; CHECK: cmpw    %ax, (%rdi)
+}
+
+define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2a:
+; CHECK: cmpw    $28527, (%rdi)
+}
+
+
+define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4:
+; CHECK: movl    (%rsi), %eax
+; CHECK: cmpl    %eax, (%rdi)
+}
+
+define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4a:
+; CHECK: cmpl $2021158767, (%rdi)
+}
+

Modified: llvm/trunk/test/Transforms/SimplifyLibCalls/memcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SimplifyLibCalls/memcmp.ll?rev=92098&r1=92097&r2=92098&view=diff

==============================================================================
--- llvm/trunk/test/Transforms/SimplifyLibCalls/memcmp.ll (original)
+++ llvm/trunk/test/Transforms/SimplifyLibCalls/memcmp.ll Wed Dec 23 18:37:38 2009
@@ -14,9 +14,6 @@
 	volatile store i32 %B, i32* %IP
 	%C = call i32 @memcmp( i8* %P, i8* %Q, i32 1 )		; <i32> [#uses=1]
 	volatile store i32 %C, i32* %IP
-	%D = call i32 @memcmp( i8* %P, i8* %Q, i32 2 )		; <i32> [#uses=1]
-	%E = icmp eq i32 %D, 0		; <i1> [#uses=1]
-	volatile store i1 %E, i1* %BP
         %F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0),
                               i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0),
                               i32 3)





More information about the llvm-commits mailing list