[llvm] r342709 - Merge clang's isRepeatedBytePattern with LLVM's isBytewiseValue

Thu Sep 20 22:17:43 PDT 2018

Author: jfb
Date: Thu Sep 20 22:17:42 2018
New Revision: 342709

URL: http://llvm.org/viewvc/llvm-project?rev=342709&view=rev
Log:
Merge clang's isRepeatedBytePattern with LLVM's isBytewiseValue

Summary:
his code was in CGDecl.cpp and really belongs in LLVM's isBytewiseValue. Teach isBytewiseValue the tricks clang's isRepeatedBytePattern had, including merging undef properly, and recursing on more types.

clang part of this patch: D51752

Subscribers: dexonsmith, llvm-commits

Differential Revision: https://reviews.llvm.org/D51751

Modified:
    llvm/trunk/include/llvm/Analysis/ValueTracking.h
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll
    llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll

Modified: llvm/trunk/include/llvm/Analysis/ValueTracking.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ValueTracking.h?rev=342709&r1=342708&r2=342709&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/ValueTracking.h (original)
+++ llvm/trunk/include/llvm/Analysis/ValueTracking.h Thu Sep 20 22:17:42 2018
@@ -221,7 +221,8 @@ class Value;
   /// return the i8 value that it is represented with. This is true for all i8
   /// values obviously, but is also true for i32 0, i32 -1, i16 0xF0F0, double
   /// 0.0 etc. If the value can't be handled with a repeated byte store (e.g.
-  /// i16 0x1234), return null.
+  /// i16 0x1234), return null. If the value is entirely undef and padding,
+  /// return undef.
   Value *isBytewiseValue(Value *V);
 
   /// Given an aggregrate and an sequence of indices, see if the scalar value

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=342709&r1=342708&r2=342709&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Thu Sep 20 22:17:42 2018
@@ -3042,62 +3042,92 @@ bool llvm::isKnownNeverNaN(const Value *
   return true;
 }
 
-/// If the specified value can be set by repeating the same byte in memory,
-/// return the i8 value that it is represented with.  This is
-/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
-/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
-/// byte store (e.g. i16 0x1234), return null.
 Value *llvm::isBytewiseValue(Value *V) {
+
   // All byte-wide stores are splatable, even of arbitrary variables.
-  if (V->getType()->isIntegerTy(8)) return V;
+  if (V->getType()->isIntegerTy(8))
+    return V;
+
+  LLVMContext &Ctx = V->getContext();
+
+  // Undef don't care.
+  auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
+  if (isa<UndefValue>(V))
+    return UndefInt8;
+
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C) {
+    // Conceptually, we could handle things like:
+    //   %a = zext i8 %X to i16
+    //   %b = shl i16 %a, 8
+    //   %c = or i16 %a, %b
+    // but until there is an example that actually needs this, it doesn't seem
+    // worth worrying about.
+    return nullptr;
+  }
 
   // Handle 'null' ConstantArrayZero etc.
-  if (Constant *C = dyn_cast<Constant>(V))
-    if (C->isNullValue())
-      return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+  if (C->isNullValue())
+    return Constant::getNullValue(Type::getInt8Ty(Ctx));
 
-  // Constant float and double values can be handled as integer values if the
+  // Constant floating-point values can be handled as integer values if the
   // corresponding integer value is "byteable".  An important case is 0.0.
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
-    if (CFP->getType()->isFloatTy())
-      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
-    if (CFP->getType()->isDoubleTy())
-      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    Type *Ty = nullptr;
+    if (CFP->getType()->isHalfTy())
+      Ty = Type::getInt16Ty(Ctx);
+    else if (CFP->getType()->isFloatTy())
+      Ty = Type::getInt32Ty(Ctx);
+    else if (CFP->getType()->isDoubleTy())
+      Ty = Type::getInt64Ty(Ctx);
     // Don't handle long double formats, which have strange constraints.
+    return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty)) : nullptr;
   }
 
   // We can handle constant integers that are multiple of 8 bits.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
     if (CI->getBitWidth() % 8 == 0) {
       assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
-
       if (!CI->getValue().isSplat(8))
         return nullptr;
-      return ConstantInt::get(V->getContext(), CI->getValue().trunc(8));
+      return ConstantInt::get(Ctx, CI->getValue().trunc(8));
     }
   }
 
-  // A ConstantDataArray/Vector is splatable if all its members are equal and
-  // also splatable.
-  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
-    Value *Elt = CA->getElementAsConstant(0);
-    Value *Val = isBytewiseValue(Elt);
-    if (!Val)
+  auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
+    if (LHS == RHS)
+      return LHS;
+    if (!LHS || !RHS)
       return nullptr;
-
-    for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
-      if (CA->getElementAsConstant(I) != Elt)
+    if (LHS == UndefInt8)
+      return RHS;
+    if (RHS == UndefInt8)
+      return LHS;
+    return nullptr;
+  };
+
+  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
+    Value *Val = UndefInt8;
+    for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
+      if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I)))))
         return nullptr;
+    return Val;
+  }
 
+  if (isa<ConstantVector>(C)) {
+    Constant *Splat = cast<ConstantVector>(C)->getSplatValue();
+    return Splat ? isBytewiseValue(Splat) : nullptr;
+  }
+
+  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+    Value *Val = UndefInt8;
+    for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
+      if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I)))))
+        return nullptr;
     return Val;
   }
 
-  // Conceptually, we could handle things like:
-  //   %a = zext i8 %X to i16
-  //   %b = shl i16 %a, 8
-  //   %c = or i16 %a, %b
-  // but until there is an example that actually needs this, it doesn't seem
-  // worth worrying about.
+  // Don't try to handle the handful of other constants.
   return nullptr;
 }
 

Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=342709&r1=342708&r2=342709&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Thu Sep 20 22:17:42 2018
@@ -348,6 +348,9 @@ static APInt getStoreStride(const SCEVAd
 /// Note that we don't ever attempt to use memset_pattern8 or 4, because these
 /// just replicate their input array and then pass on to memset_pattern16.
 static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+  // FIXME: This could check for UndefValue because it can be merged into any
+  // other valid pattern.
+
   // If the value isn't a constant, we can't promote it to being in a constant
   // array.  We could theoretically do a store to an alloca or something, but
   // that doesn't seem worthwhile.
@@ -645,9 +648,13 @@ bool LoopIdiomRecognize::processLoopStor
 
       if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
         if (For == ForMemset::Yes) {
+          if (isa<UndefValue>(FirstSplatValue))
+            FirstSplatValue = SecondSplatValue;
           if (FirstSplatValue != SecondSplatValue)
             continue;
         } else {
+          if (isa<UndefValue>(FirstPatternValue))
+            FirstPatternValue = SecondPatternValue;
           if (FirstPatternValue != SecondPatternValue)
             continue;
         }

Modified: llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp?rev=342709&r1=342708&r2=342709&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp Thu Sep 20 22:17:42 2018
@@ -413,7 +413,10 @@ Instruction *MemCpyOptPass::tryMergingIn
       if (!NextStore->isSimple()) break;
 
       // Check to see if this stored value is of the same byte-splattable value.
-      if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+      Value *StoredByte = isBytewiseValue(NextStore->getOperand(0));
+      if (isa<UndefValue>(ByteVal) && StoredByte)
+        ByteVal = StoredByte;
+      if (ByteVal != StoredByte)
         break;
 
       // Check to see if this store is to a constant offset from the start ptr.

Modified: llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll?rev=342709&r1=342708&r2=342709&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/fca2memcpy.ll Thu Sep 20 22:17:42 2018
@@ -73,13 +73,16 @@ define void @copyalias(%S* %src, %S* %ds
   ret void
 }
 
-; If the store address is computed ina complex manner, make
+; If the store address is computed in a complex manner, make
 ; sure we lift the computation as well if needed and possible.
 define void @addrproducer(%S* %src, %S* %dst) {
-; CHECK-LABEL: addrproducer
-; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1
-; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
-; CHECK-NEXT: store %S undef, %S* %dst
+; CHECK-LABEL: addrproducer(
+; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
+; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i64 1
+; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
+; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
 ; CHECK-NEXT: ret void
   %1 = load %S, %S* %src
   store %S undef, %S* %dst
@@ -89,7 +92,14 @@ define void @addrproducer(%S* %src, %S*
 }
 
 define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
-; CHECK-LABEL: aliasaddrproducer
+; CHECK-LABEL: aliasaddrproducer(
+; CHECK-NEXT: %[[SRC:[0-9]+]] = load %S, %S* %src
+; CHECK-NEXT: %[[DSTCAST:[0-9]+]] = bitcast %S* %dst to i8*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[DSTCAST]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: %dstindex = load i32, i32* %dstidptr
+; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
+; CHECK-NEXT: store %S %[[SRC]], %S* %dst2
+; CHECK-NEXT: ret void
   %1 = load %S, %S* %src
   store %S undef, %S* %dst
   %dstindex = load i32, i32* %dstidptr
@@ -99,7 +109,16 @@ define void @aliasaddrproducer(%S* %src,
 }
 
 define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
-; CHECK-LABEL: noaliasaddrproducer
+; CHECK-LABEL: noaliasaddrproducer(
+; CHECK-NEXT: %[[SRCCAST:[0-9]+]] = bitcast %S* %src to i8*
+; CHECK-NEXT: %[[LOADED:[0-9]+]] = load i32, i32* %dstidptr
+; CHECK-NEXT: %dstindex = or i32 %[[LOADED]], 1
+; CHECK-NEXT: %dst2 = getelementptr %S, %S* %dst, i32 %dstindex
+; CHECK-NEXT: %[[DST2CAST:[0-9]+]] = bitcast %S* %dst2 to i8*
+; CHECK-NEXT: %[[SRCCAST2:[0-9]+]] = bitcast %S* %src to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[DST2CAST]], i8* align 8 %[[SRCCAST2]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 %[[SRCCAST]], i8 undef, i64 16, i1 false)
+; CHECK-NEXT: ret void
   %1 = load %S, %S* %src
   store %S undef, %S* %src
   %2 = load i32, i32* %dstidptr

Modified: llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll?rev=342709&r1=342708&r2=342709&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll (original)
+++ llvm/trunk/test/Transforms/MemCpyOpt/memcpy-to-memset.ll Thu Sep 20 22:17:42 2018
@@ -1,19 +1,89 @@
 ; RUN: opt -memcpyopt -S < %s | FileCheck %s
 
- at cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
-
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @foo(i32*) nounwind
 
-define void @test1() nounwind {
-  %arr = alloca [3 x i32], align 4
-  %arr_i8 = bitcast [3 x i32]* %arr to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %arr_i8, i8* align 4 bitcast ([3 x i32]* @cst to i8*), i64 12, i1 false)
-  %arraydecay = getelementptr inbounds [3 x i32], [3 x i32]* %arr, i64 0, i64 0
-  call void @foo(i32* %arraydecay) nounwind
-  ret void
-; CHECK-LABEL: @test1(
-; CHECK: call void @llvm.memset
-; CHECK-NOT: call void @llvm.memcpy
-; CHECK: ret void
+ at undef = internal constant i32 undef, align 4
+define void @test_undef() nounwind {
+  %a = alloca i32, align 4
+  %i8 = bitcast i32* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (i32* @undef to i8*), i64 4, i1 false)
+  ret void
+; CHECK-LABEL: @test_undef(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at i32x3 = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
+define void @test_i32x3() nounwind {
+  %a = alloca [3 x i32], align 4
+  %i8 = bitcast [3 x i32]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3 to i8*), i64 12, i1 false)
+  ret void
+; CHECK-LABEL: @test_i32x3(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at i32x3_undef = internal constant [3 x i32] [i32 -1, i32 undef, i32 -1], align 4
+define void @test_i32x3_undef() nounwind {
+  %a = alloca [3 x i32], align 4
+  %i8 = bitcast [3 x i32]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast ([3 x i32]* @i32x3_undef to i8*), i64 12, i1 false)
+  ret void
+; CHECK-LABEL: @test_i32x3_undef(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+%struct.bitfield = type { i8, [3 x i8] }
+ at bitfield = private unnamed_addr constant %struct.bitfield { i8 -86, [3 x i8] [i8 -86, i8 -86, i8 -86] }, align 4
+define void @test_bitfield() nounwind {
+  %a = alloca %struct.bitfield, align 4
+  %i8 = bitcast %struct.bitfield* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (%struct.bitfield* @bitfield to i8*), i64 4, i1 false)
+  ret void
+; CHECK-LABEL: @test_bitfield(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at i1x16_zero = internal constant <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, align 4
+define void @test_i1x16_zero() nounwind {
+  %a = alloca <16 x i1>, align 4
+  %i8 = bitcast <16 x i1>* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_zero to i8*), i64 16, i1 false)
+  ret void
+; CHECK-LABEL: @test_i1x16_zero(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+; i1 isn't currently handled. Should it?
+ at i1x16_one = internal constant <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, align 4
+define void @test_i1x16_one() nounwind {
+  %a = alloca <16 x i1>, align 4
+  %i8 = bitcast <16 x i1>* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (<16 x i1>* @i1x16_one to i8*), i64 16, i1 false)
+  ret void
+; CHECK-LABEL: @test_i1x16_one(
+; CHECK-NOT:   call void @llvm.memset
+; CHECK:      call void @llvm.memcpy
+; CHECK:       ret void
+}
+
+ at half = internal constant half 0xH0000, align 4
+define void @test_half() nounwind {
+  %a = alloca half, align 4
+  %i8 = bitcast half* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %i8, i8* align 4 bitcast (half* @half to i8*), i64 2, i1 false)
+  ret void
+; CHECK-LABEL: @test_half(
+; CHECK:       call void @llvm.memset
+; CHECK-NOT:   call void @llvm.memcpy
+; CHECK:       ret void
 }