[llvm] e80e134 - [InstCombine] Add support for stpncpy folding

Martin Sebor via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 27 13:44:44 PDT 2022


Author: Martin Sebor
Date: 2022-09-27T14:44:33-06:00
New Revision: e80e134c77bb093370a3e4fee41ebe8710e3564d

URL: https://github.com/llvm/llvm-project/commit/e80e134c77bb093370a3e4fee41ebe8710e3564d
DIFF: https://github.com/llvm/llvm-project/commit/e80e134c77bb093370a3e4fee41ebe8710e3564d.diff

LOG: [InstCombine] Add support for stpncpy folding

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D130922

Added: 
    llvm/test/Transforms/InstCombine/stpncpy-1.ll
    llvm/test/Transforms/InstCombine/strncpy-4.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
    llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
    llvm/test/Transforms/InstCombine/simplify-libcalls.ll
    llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
    llvm/test/Transforms/InstCombine/strncpy-1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 2817f2bcfbb9f..a9e26c076c7ec 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -183,6 +183,9 @@ class LibCallSimplifier {
   Value *optimizeRealloc(CallInst *CI, IRBuilderBase &B);
   Value *optimizeWcslen(CallInst *CI, IRBuilderBase &B);
   Value *optimizeBCopy(CallInst *CI, IRBuilderBase &B);
+
+  // Helper to optimize stpncpy and strncpy.
+  Value *optimizeStringNCpy(CallInst *CI, bool RetEnd, IRBuilderBase &B);
   // Wrapper for all String/Memory Library Call Optimizations
   Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilderBase &B);
 

diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 232ec6f586ac9..a463ae3dc542a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -773,40 +773,62 @@ Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
   return ConstantInt::get(CI->getType(), SrcLen);
 }
 
-// Optimize a call to strncpy.
-
-Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
+// Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
+// otherwise.
+Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
+                                             IRBuilderBase &B) {
   Function *Callee = CI->getCalledFunction();
   Value *Dst = CI->getArgOperand(0);
   Value *Src = CI->getArgOperand(1);
   Value *Size = CI->getArgOperand(2);
-  annotateNonNullNoUndefBasedOnAccess(CI, 0);
-  if (isKnownNonZero(Size, DL))
+
+  if (isKnownNonZero(Size, DL)) {
+    // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
+    // only when N is nonzero.
+    annotateNonNullNoUndefBasedOnAccess(CI, 0);
     annotateNonNullNoUndefBasedOnAccess(CI, 1);
+  }
 
-  uint64_t Len;
-  if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
-    Len = LengthArg->getZExtValue();
-  else
-    return nullptr;
+  // If the "bound" argument is known set N to it.  Otherwise set it to
+  // UINT64_MAX and handle it later.
+  uint64_t N = UINT64_MAX;
+  if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
+    N = SizeC->getZExtValue();
 
-  // strncpy(x, y, 0) -> x
-  if (Len == 0)
+  if (N == 0)
+    // Fold st{p,r}ncpy(D, S, 0) to D.
     return Dst;
 
-  // See if we can get the length of the input string.
+  if (N == 1) {
+    Type *CharTy = B.getInt8Ty();
+    Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0");
+    B.CreateStore(CharVal, Dst);
+    if (!RetEnd)
+      // Transform strncpy(D, S, 1) to return (*D = *S), D.
+      return Dst;
+
+    // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
+    Value *ZeroChar = ConstantInt::get(CharTy, 0);
+    Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp");
+
+    Value *Off1 = B.getInt32(1);
+    Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end");
+    return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel");
+  }
+
+  // If the length of the input string is known set SrcLen to it.
   uint64_t SrcLen = GetStringLength(Src);
-  if (SrcLen) {
+  if (SrcLen)
     annotateDereferenceableBytes(CI, 1, SrcLen);
-    --SrcLen; // Unbias length.
-  } else {
+  else
     return nullptr;
-  }
+
+  --SrcLen; // Unbias length.
 
   if (SrcLen == 0) {
-    // strncpy(x, "", y) -> memset(x, '\0', y)
+    // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
     Align MemSetAlign =
-        CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
+      CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
     CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
     AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
     NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
@@ -815,28 +837,37 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
     return Dst;
   }
 
-  // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4)
-  if (Len > SrcLen + 1) {
-    if (Len <= 128) {
-      StringRef Str;
-      if (!getConstantStringInfo(Src, Str))
-        return nullptr;
-      std::string SrcStr = Str.str();
-      SrcStr.resize(Len, '\0');
-      Src = B.CreateGlobalString(SrcStr, "str");
-    } else {
+  if (N > SrcLen + 1) {
+    if (N > 128)
+      // Bail if N is large or unknown.
       return nullptr;
-    }
+
+    // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
+    StringRef Str;
+    if (!getConstantStringInfo(Src, Str))
+      return nullptr;
+    std::string SrcStr = Str.str();
+    // Create a bigger, nul-padded array with the same length, SrcLen,
+    // as the original string.
+    SrcStr.resize(N, '\0');
+    Src = B.CreateGlobalString(SrcStr, "str");
   }
 
   Type *PT = Callee->getFunctionType()->getParamType(0);
-  // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
+  // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
+  // S and N are constant.
   CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
-                                   ConstantInt::get(DL.getIntPtrType(PT), Len));
+                                   ConstantInt::get(DL.getIntPtrType(PT), N));
   NewCI->setAttributes(CI->getAttributes());
   NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
   copyFlags(*CI, NewCI);
-  return Dst;
+  if (!RetEnd)
+    return Dst;
+
+  // stpncpy(D, S, N) returns the address of the first null in D if it writes
+  // one, otherwise D + N.
+  Value *Off = B.getInt64(std::min(SrcLen, N));
+  return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr");
 }
 
 Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
@@ -3349,8 +3380,10 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
       return optimizeStpCpy(CI, Builder);
     case LibFunc_strlcpy:
       return optimizeStrLCpy(CI, Builder);
+    case LibFunc_stpncpy:
+      return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder);
     case LibFunc_strncpy:
-      return optimizeStrNCpy(CI, Builder);
+      return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder);
     case LibFunc_strlen:
       return optimizeStrLen(CI, Builder);
     case LibFunc_strnlen:

diff  --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
index d5553095fdace..0c834a9b3ed60 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
@@ -204,7 +204,7 @@ declare i16 @strcmp(i8*, i8*) #0
 
 define void @test9(i8* %x) {
 ; CHECK32-LABEL: @test9(
-; CHECK32-NEXT:    [[Y:%.*]] = call i16 @strcmp(i8* [[X:%.*]], i8* [[X]]) #[[ATTR5:[0-9]+]]
+; CHECK32-NEXT:    [[Y:%.*]] = call i16 @strcmp(i8* [[X:%.*]], i8* [[X]]) #[[ATTR6:[0-9]+]]
 ; CHECK32-NEXT:    ret void
 ;
 ; CHECK16-LABEL: @test9(
@@ -321,13 +321,13 @@ define i4 @strlen(i8* %s) {
   ret i4 0
 }
 
-; Test emission of stpncpy.
+; Test emission of stpncpy, including call attributes.
 @a = dso_local global [4 x i8] c"123\00"
 @b = dso_local global [5 x i8] zeroinitializer
 declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef)
 define signext i32 @emit_stpncpy() {
 ; CHECK-LABEL: @emit_stpncpy(
-; CHECK-NEXT:    [[STPNCPY:%.*]] = call i8* @stpncpy(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
+; CHECK-NEXT:    [[STPNCPY:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
 ; CHECK-NEXT:    ret i32 0
 ;
   %call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0),

diff  --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index 2ee085c31ef1c..16ba967b8b8fb 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -338,7 +338,7 @@ define i4 @strlen(i8* %s) {
 declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef)
 define signext i32 @emit_stpncpy() {
 ; CHECK-LABEL: @emit_stpncpy(
-; CHECK-NEXT:    [[STPNCPY:%.*]] = call i8* @stpncpy(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
+; CHECK-NEXT:    [[STPNCPY:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
 ; CHECK-NEXT:    ret i32 0
 ;
   %call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0),

diff  --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
new file mode 100644
index 0000000000000..51e389e9c484e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
@@ -0,0 +1,466 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+;
+; Test that the stpncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -data-layout="E" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,BE
+; RUN: opt < %s -data-layout="e" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,LE
+
+declare i8* @stpncpy(i8*, i8*, i64)
+
+declare void @sink(i8*, i8*)
+
+ at a4 = constant [4 x i8] c"1234"
+ at s4 = constant [5 x i8] c"1234\00"
+
+
+; The following are generated by the stpncpy -> memcpy transformation
+; (trading space for speed).
+ at str = private constant [4 x i8] c"4\00\00\00"
+ at str.1 = private constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00"
+ at str.2 = private constant [10 x i8] c"1234\00\00\00\00\00\00"
+ at str.3 = private unnamed_addr constant [4 x i8] c"4\00\00\00", align 1
+ at str.4 = private unnamed_addr constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00", align 1
+ at str.5 = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+
+; Verify that the generated constants have the expected contents.
+; ANY: @[[A4:[a-zA-Z0-9_$"\\.-]+]] = constant [4 x i8] c"1234"
+; ANY: @[[S4:[a-zA-Z0-9_$"\\.-]+]] = constant [5 x i8] c"1234\00"
+; ANY: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private constant [4 x i8] c"4\00\00\00"
+; ANY: @[[STR_1:[a-zA-Z0-9_$"\\.-]+]] = private constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00"
+; ANY: @[[STR_2:[a-zA-Z0-9_$"\\.-]+]] = private constant [10 x i8] c"1234\00\00\00\00\00\00"
+; ANY: @[[STR_3:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [4 x i8] c"4\00\00\00", align 1
+; ANY: @[[STR_4:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00", align 1
+; ANY: @[[STR_5:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+; ANY: @[[STR_6:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [4 x i8] c"4\00\00\00", align 1
+; ANY: @[[STR_7:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00", align 1
+; ANY: @[[STR_8:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+; ANY: @[[STR_9:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+
+; Verify that exactly overlapping stpncpy(D, D, N) calls are transformed
+; to D + strnlen(D, N) or, equivalently, D + (*D != '\0'), when N < 2.
+
+define void @fold_stpncpy_overlap(i8* %dst, i64 %n) {
+; ANY-LABEL: @fold_stpncpy_overlap(
+; ANY-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[DST]], align 1
+; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
+; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* [[STPNCPY_SEL]])
+; ANY-NEXT:    ret void
+;
+; Fold stpncpy(D, D, 0) to just D.
+  %es_0 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 0)
+  call void @sink(i8* %dst, i8* %es_0)
+
+; Fold stpncpy(D, D, 1) to D + (*D != '\0').
+  %es_1 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 1)
+  call void @sink(i8* %dst, i8* %es_1)
+
+  ret void
+}
+
+
+; Verify that exactly overlapping stpncpy(D, D, N) calls are left alone
+; when N >= 2.  Such calls are strictly undefined and while simplifying
+; them to the expected result is possible there is little to gain from it.
+
+define void @call_stpncpy_overlap(i8* %dst, i64 %n) {
+; ANY-LABEL: @call_stpncpy_overlap(
+; ANY-NEXT:    [[ES_2:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 2)
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_2]])
+; ANY-NEXT:    [[ES_3:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 3)
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_3]])
+; ANY-NEXT:    [[ES_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* [[DST]], i64 [[N:%.*]])
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_N]])
+; ANY-NEXT:    ret void
+;
+; Do not transform stpncpy(D, D, 2).
+  %es_2 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 2)
+  call void @sink(i8* %dst, i8* %es_2)
+
+; Do not transform stpncpy(D, D, 3).
+  %es_3 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 3)
+  call void @sink(i8* %dst, i8* %es_3)
+
+; Do not transform stpncpy(D, D, N).
+  %es_n = call i8* @stpncpy(i8* %dst, i8* %dst, i64 %n)
+  call void @sink(i8* %dst, i8* %es_n)
+
+  ret void
+}
+
+
+; Verify that stpncpy(D, "", N) calls are transformed to memset(D, 0, N).
+
+define void @fold_stpncpy_s0(i8* %dst, i64 %n) {
+; ANY-LABEL: @fold_stpncpy_s0(
+; ANY-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; ANY-NEXT:    store i8 0, i8* [[DST]], align 1
+; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; ANY-NEXT:    store i16 0, i16* [[TMP1]], align 1
+; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false)
+; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false)
+; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT:    ret void
+;
+  %ps0 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 4
+
+; Fold stpncpy(D, "", 0) to just D.
+  %es0_0 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 0)
+  call void @sink(i8* %dst, i8* %es0_0)
+
+; Transform stpncpy(D, "", 1) to *D = '\0, D.
+  %es0_1 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 1)
+  call void @sink(i8* %dst, i8* %es0_1)
+
+; Transform stpncpy(D, "", 2) to memset(D, 0, 2), D.
+  %es0_2 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 2)
+  call void @sink(i8* %dst, i8* %es0_2)
+
+; Transform stpncpy(D, "", 9) to memset(D, 0, 9), D.
+  %es0_9 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 9)
+  call void @sink(i8* %dst, i8* %es0_9)
+
+; Transform stpncpy(D, "", n) to memset(D, 0, n), D.
+  %es0_n = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 %n)
+  call void @sink(i8* %dst, i8* %es0_n)
+
+  ret void
+}
+
+
+; Verify that stpncpy(D, "4", N) calls are transformed to the equivalent
+; of strncpy(D, "4", N) and the result folded to D + (N != 0).
+
+define void @fold_stpncpy_s1(i8* %dst) {
+; BE-LABEL: @fold_stpncpy_s1(
+; BE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; BE-NEXT:    store i8 52, i8* [[DST]], align 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; BE-NEXT:    store i16 13312, i16* [[TMP1]], align 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.6, i64 0, i64 0), i64 3, i1 false)
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.7, i64 0, i64 0), i64 9, i1 false)
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT:    ret void
+;
+; LE-LABEL: @fold_stpncpy_s1(
+; LE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; LE-NEXT:    store i8 52, i8* [[DST]], align 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; LE-NEXT:    store i16 52, i16* [[TMP1]], align 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.6, i64 0, i64 0), i64 3, i1 false)
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.7, i64 0, i64 0), i64 9, i1 false)
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT:    ret void
+;
+  %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3
+
+; Fold stpncpy(D, "4", 0) to just D.
+  %es1_0 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 0)
+  call void @sink(i8* %dst, i8* %es1_0)
+
+; Transform stpncpy(D, "4", 1) to *D = '4', D + 1.
+  %es1_1 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 1)
+  call void @sink(i8* %dst, i8* %es1_1)
+
+; Transform stpncpy(D, "4", 2) to strncpy(D, "4", 2) + 1.
+  %es1_2 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 2)
+  call void @sink(i8* %dst, i8* %es1_2)
+
+; Transform stpncpy(D, "4", 3) to strncpy(D, "4", 3) + 1, which is then
+; transformed to memcpy(D, "4", 2), D[2] = '\0', D + 1.
+  %es1_3 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 3)
+  call void @sink(i8* %dst, i8* %es1_3)
+
+; Transform stpncpy(D, "4", 9) to strncpy(D, "4", 9) + 1.
+  %es1_9 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 9)
+  call void @sink(i8* %dst, i8* %es1_9)
+
+  ret void
+}
+
+
+; Verify that stpncpy(D, "1234", N) calls are transformed to the equivalent
+; of strncpy(D, "1234", N) and the result folded to D + min(4, N).
+
+define void @fold_stpncpy_s4(i8* %dst, i64 %n) {
+; BE-LABEL: @fold_stpncpy_s4(
+; BE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; BE-NEXT:    store i8 49, i8* [[DST]], align 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; BE-NEXT:    store i16 12594, i16* [[TMP1]], align 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false)
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; BE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; BE-NEXT:    store i32 825373492, i32* [[TMP2]], align 1
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.8, i64 0, i64 0), i64 9, i1 false)
+; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; BE-NEXT:    ret void
+;
+; LE-LABEL: @fold_stpncpy_s4(
+; LE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; LE-NEXT:    store i8 49, i8* [[DST]], align 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; LE-NEXT:    store i16 12849, i16* [[TMP1]], align 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false)
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; LE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; LE-NEXT:    store i32 875770417, i32* [[TMP2]], align 1
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.8, i64 0, i64 0), i64 9, i1 false)
+; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; LE-NEXT:    ret void
+;
+  %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0
+
+; Fold stpncpy(D, "1234", 0) to just D.
+  %es4_0 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 0)
+  call void @sink(i8* %dst, i8* %es4_0)
+
+; Transform stpncpy(D, "1234", 1) to *D = '4', D + 1.
+  %es4_1 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 1)
+  call void @sink(i8* %dst, i8* %es4_1)
+
+; Transform stpncpy(D, "1234", 2) to strncpy(D, "1234", 2) + 2.
+  %es4_2 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 2)
+  call void @sink(i8* %dst, i8* %es4_2)
+
+; Transform stpncpy(D, "1234", 3) to strncpy(D, "1234", 3) + 3
+  %es4_3 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 3)
+  call void @sink(i8* %dst, i8* %es4_3)
+
+; Transform stpncpy(D, "1234", 4) to strncpy(D, "1234", 4) + 4.
+  %es4_4 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 4)
+  call void @sink(i8* %dst, i8* %es4_4)
+
+; Transform stpncpy(D, "1234", 9) to strncpy(D, "1234", 9) + 4.
+  %es4_9 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 9)
+  call void @sink(i8* %dst, i8* %es4_9)
+
+  ret void
+}
+
+
+; Verify that a call to stpncpy(D, A, N) with a constant source larger
+; than one byte is left alone when N is unknown.
+
+define void @call_stpncpy_xx_n(i8* %dst, i64 %n) {
+; ANY-LABEL: @call_stpncpy_xx_n(
+; ANY-NEXT:    [[EA1_N:%.*]] = call i8* @stpncpy(i8* [[DST:%.*]], i8* dereferenceable(2) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 3), i64 [[N:%.*]])
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[EA1_N]])
+; ANY-NEXT:    [[EA4_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 [[N]])
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[EA4_N]])
+; ANY-NEXT:    [[ES1_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* dereferenceable(2) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 3), i64 [[N]])
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES1_N]])
+; ANY-NEXT:    [[ES4_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 [[N]])
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES4_N]])
+; ANY-NEXT:    ret void
+;
+; Do not transform stpncpy(D, A4 + 3, N) when N is unknown.
+  %pa1 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 3
+  %ea1_n = call i8* @stpncpy(i8* %dst, i8* %pa1, i64 %n)
+  call void @sink(i8* %dst, i8* %ea1_n)
+
+; Do not transform stpncpy(D, A4, N) when N is unknown.
+  %pa4 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 0
+  %ea4_n = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 %n)
+  call void @sink(i8* %dst, i8* %ea4_n)
+
+; Do not transform stpncpy(D, "4", N) when N is unknown.
+  %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3
+  %es1_n = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 %n)
+  call void @sink(i8* %dst, i8* %es1_n)
+
+; Likewise, do not transform stpncpy(D, "1234", N) when N is unknown.
+  %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0
+  %es4_n = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 %n)
+  call void @sink(i8* %dst, i8* %es4_n)
+
+  ret void
+}
+
+; Verify that stpncpy(D, (char[4]){"1234"}, N) calls with an unterminated
+; source array are transformed to the equivalent strncpy call and the result
+; folded to D + min(4, N).
+
+define void @fold_stpncpy_a4(i8* %dst, i64 %n) {
+; BE-LABEL: @fold_stpncpy_a4(
+; BE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; BE-NEXT:    store i8 49, i8* [[DST]], align 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; BE-NEXT:    store i16 12594, i16* [[TMP1]], align 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 3, i1 false)
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; BE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; BE-NEXT:    store i32 825373492, i32* [[TMP2]], align 1
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 5, i1 false)
+; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.9, i64 0, i64 0), i64 9, i1 false)
+; BE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR4]])
+; BE-NEXT:    ret void
+;
+; LE-LABEL: @fold_stpncpy_a4(
+; LE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; LE-NEXT:    store i8 49, i8* [[DST]], align 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; LE-NEXT:    store i16 12849, i16* [[TMP1]], align 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 3, i1 false)
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; LE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; LE-NEXT:    store i32 875770417, i32* [[TMP2]], align 1
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 5, i1 false)
+; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.9, i64 0, i64 0), i64 9, i1 false)
+; LE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR4]])
+; LE-NEXT:    ret void
+;
+
+  %pa4 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 0
+
+; Fold stpncpy(D, A4, 0) to just D.
+  %ea4_0 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 0)
+  call void @sink(i8* %dst, i8* %ea4_0)
+
+; Transform stpncpy(D, A4, 1) to *D = '4', D + 1.
+  %ea4_1 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 1)
+  call void @sink(i8* %dst, i8* %ea4_1)
+
+; Transform stpncpy(D, A4, 2) to strncpy(D, A4, 2) + 2.
+  %ea4_2 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 2)
+  call void @sink(i8* %dst, i8* %ea4_2)
+
+; Transform stpncpy(D, A4, 3) to strncpy(D, A4, 3) + 3
+  %ea4_3 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 3)
+  call void @sink(i8* %dst, i8* %ea4_3)
+
+; Transform stpncpy(D, A4, 4) to strncpy(D, A4, 4) + 4.
+  %ea4_4 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 4)
+  call void @sink(i8* %dst, i8* %ea4_4)
+
+; Transform stpncpy(D, A4, 5) to strncpy(D, A4, 5) + 4.
+  %ea4_5 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 5)
+  call void @sink(i8* %dst, i8* %ea4_5)
+
+; Transform stpncpy(D, A4, 9) to strncpy(D, A4, 9) + 4.
+  %ea4_9 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 9)
+  call void @sink(i8* %dst, i8* %ea4_9)
+
+  ret void
+}
+
+
+; Verify that stpncpy(D, S, N) calls with N < 2 are transformed to
+; the equivalent of strncpy and either folded to D if N == 0 or to
+; *D ? D + 1 : D otherwise.
+
+define void @fold_stpncpy_s(i8* %dst, i8* %src) {
+; ANY-LABEL: @fold_stpncpy_s(
+; ANY-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1
+; ANY-NEXT:    store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1
+; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
+; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* [[STPNCPY_SEL]])
+; ANY-NEXT:    ret void
+;
+; Fold stpncpy(D, S, 0) to just D.
+  %es_0 = call i8* @stpncpy(i8* %dst, i8* %src, i64 0)
+  call void @sink(i8* %dst, i8* %es_0)
+
+; Transform stpncpy(D, "", 1) to *D = '\0, D.
+  %es_1 = call i8* @stpncpy(i8* %dst, i8* %src, i64 1)
+  call void @sink(i8* %dst, i8* %es_1)
+
+  ret void
+}
+
+
+; Verify that stpncpy(D, S, N) calls with N >= 2 are not transformed.
+; In theory they could be transformed to the equivalent of the following
+; though it's not clear that it would be a win:
+;   P = memccpy(D, S, 0, N)
+;   N' = P ? N - (P - D) : 0
+;   Q = P ? P : D + N
+;   memset(Q, 0, N')
+;   Q
+; Also verify that the arguments of the call are annotated with the right
+; attributes.
+
+define void @call_stpncpy_s(i8* %dst, i8* %src, i64 %n) {
+; ANY-LABEL: @call_stpncpy_s(
+; ANY-NEXT:    [[ES_2:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2)
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_2]])
+; ANY-NEXT:    [[ES_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* [[SRC]], i64 [[N:%.*]])
+; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_N]])
+; ANY-NEXT:    ret void
+;
+; Do not transform stpncpy(D, S, 2).  Both *D and *S must be derefernceable
+; but neither D[1] nor S[1] need be.
+  %es_2 = call i8* @stpncpy(i8* %dst, i8* %src, i64 2)
+  call void @sink(i8* %dst, i8* %es_2)
+
+; Do not transform stpncpy(D, S, N).  Both D and S must be nonnull but
+; neither *D nor *S need be dereferenceable.
+; TODO: Both D and S should be annotated nonnull and noundef regardless
+; of the value of N.  See https://reviews.llvm.org/D124633.
+  %es_n = call i8* @stpncpy(i8* %dst, i8* %src, i64 %n)
+  call void @sink(i8* %dst, i8* %es_n)
+
+  ret void
+}
+;.
+; ANY: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly }
+; ANY: attributes #[[ATTR1:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn }
+;.

diff  --git a/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll b/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
index 744ade0d28f9e..f70fd517da6ed 100644
--- a/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
@@ -60,12 +60,11 @@ entry:
   ret void
 }
 
-; Note: stpncpy is not handled by SimplifyLibcalls yet, so this should not be changed.
 define void @test_stpncpy_to_memcpy(i8 addrspace(200)* %dst) addrspace(200) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@test_stpncpy_to_memcpy
 ; CHECK-SAME: (i8 addrspace(200)* [[DST:%.*]]) addrspace(200) #[[ATTR1]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = call addrspace(200) i8 addrspace(200)* @stpncpy(i8 addrspace(200)* [[DST]], i8 addrspace(200)* getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0), i64 17)
+; CHECK-NEXT:    call addrspace(200) void @llvm.memcpy.p200i8.p200i8.i128(i8 addrspace(200)* noundef align 1 dereferenceable(17) [[DST]], i8 addrspace(200)* noundef align 1 dereferenceable(17) getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0), i128 17, i1 false)
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/InstCombine/strncpy-1.ll b/llvm/test/Transforms/InstCombine/strncpy-1.ll
index a935c431bca5b..78d2fdc0f7c9e 100644
--- a/llvm/test/Transforms/InstCombine/strncpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/strncpy-1.ll
@@ -107,8 +107,14 @@ define void @test_simplify6(i8* %dst) {
 
 define void @test_simplify7(i8* %dst, i32 %n) {
 ; CHECK-LABEL: @test_simplify7(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(80) [[DST:%.*]], i8* getelementptr inbounds ([1 x i8], [1 x i8]* @null, i32 0, i32 0), i32 [[N:%.*]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* align 1 dereferenceable(80) [[DST:%.*]], i8 0, i32 [[N:%.*]], i1 false)
 ; CHECK-NEXT:    ret void
+;
+; Unless N is known to be nonzero, strncpy(D, "", N) need not access any
+; bytes in D.
+; TODO: The argument, already annotated dereferenceable, should be
+; annotated noundef and nonnull by the transformation.  See
+; https://reviews.llvm.org/D124633.
 ;
   %src = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
   call i8* @strncpy(i8* dereferenceable(80) %dst, i8* %src, i32 %n)
@@ -117,8 +123,14 @@ define void @test_simplify7(i8* %dst, i32 %n) {
 
 define i8* @test1(i8* %dst, i8* %src, i32 %n) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @strncpy(i8* noundef nonnull [[DST:%.*]], i8* nonnull [[SRC:%.*]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[RET:%.*]] = call i8* @strncpy(i8* nonnull [[DST:%.*]], i8* nonnull [[SRC:%.*]], i32 [[N:%.*]])
 ; CHECK-NEXT:    ret i8* [[RET]]
+;
+; Unless N is known to be nonzero, strncpy(D, S, N) need not access any
+; bytes in either D or S.  Verify that the call isn't annotated with
+; the dereferenceable attribute.
+; TODO: Both arguments should be annotated noundef in addition to nonnull.
+; See https://reviews.llvm.org/D124633.
 ;
   %ret = call i8* @strncpy(i8* nonnull %dst, i8* nonnull %src, i32 %n)
   ret i8* %ret
@@ -182,8 +194,8 @@ define void @test_no_simplify2() {
 
 define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) {
 ; CHECK-LABEL: @test_no_simplify3(
-; CHECK-NEXT:    %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32)
-; CHECK-NEXT:    ret i8* %ret
+; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @strncpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 32)
+; CHECK-NEXT:    ret i8* [[RET]]
 ;
   %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32)
   ret i8* %ret
@@ -191,8 +203,8 @@ define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) {
 
 define i8* @test_no_simplify4(i8* %dst, i8* %src, i32 %count) {
 ; CHECK-LABEL: @test_no_simplify4(
-; CHECK-NEXT:    %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6)
-; CHECK-NEXT:    ret i8* %ret
+; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @strncpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 6)
+; CHECK-NEXT:    ret i8* [[RET]]
 ;
   %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6)
   ret i8* %ret

diff  --git a/llvm/test/Transforms/InstCombine/strncpy-4.ll b/llvm/test/Transforms/InstCombine/strncpy-4.ll
new file mode 100644
index 0000000000000..6b9f79b09261e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncpy-4.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;
+; Test that strncpy(D, S, N) calls with the empty string S as a source
+; are simplified for all values of N.
+;
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i8* @strncpy(i8*, i8*, i64)
+
+; A string of length 4 but size 9 to also verify that characters after
+; the nul don't affect the transformation.
+ at s4 = constant [9 x i8] c"1234\00567\00"
+
+declare void @sink(i8*, i8*)
+
+
+; Verify that exactly overlapping strncpy(D, D, N) calls are simplified
+; only when N < 2.
+
+define void @fold_strncpy_overlap(i8* %dst, i64 %n) {
+; CHECK-LABEL: @fold_strncpy_overlap(
+; CHECK-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[DST]])
+; CHECK-NEXT:    ret void
+;
+; Fold strncpy(D, D, 0) to D.
+  %ed_0 = call i8* @strncpy(i8* %dst, i8* %dst, i64 0)
+  call void @sink(i8* %dst, i8* %ed_0)
+
+; Fold strncpy(D, D, 1) to D.
+  %ed_1 = call i8* @strncpy(i8* %dst, i8* %dst, i64 1)
+  call void @sink(i8* %dst, i8* %ed_1)
+
+  ret void
+}
+
+
+; Verify that exactly overlapping strncpy(D, D, N) calls are left alone
+; when N >= 2.
+; Such calls are undefined and although they're benign and could be
+; simplified to
+;   memset(D + strnlen(D, N), D, N - strnlen(D, N))
+; there is little to gain from it.
+
+define void @call_strncpy_overlap(i8* %dst, i64 %n) {
+; CHECK-LABEL: @call_strncpy_overlap(
+; CHECK-NEXT:    [[ED_2:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 2)
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[ED_2]])
+; CHECK-NEXT:    [[ED_3:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 3)
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[ED_3]])
+; CHECK-NEXT:    [[ED_N:%.*]] = call i8* @strncpy(i8* [[DST]], i8* [[DST]], i64 [[N:%.*]])
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[ED_N]])
+; CHECK-NEXT:    ret void
+;
+
+; Do not transform strncpy(D, D, 2).
+  %ed_2 = call i8* @strncpy(i8* %dst, i8* %dst, i64 2)
+  call void @sink(i8* %dst, i8* %ed_2)
+
+; Do not transform strncpy(D, D, 3).
+  %ed_3 = call i8* @strncpy(i8* %dst, i8* %dst, i64 3)
+  call void @sink(i8* %dst, i8* %ed_3)
+
+; Do not transform strncpy(D, D, N).
+  %ed_n = call i8* @strncpy(i8* %dst, i8* %dst, i64 %n)
+  call void @sink(i8* %dst, i8* %ed_n)
+
+  ret void
+}
+
+
+; Verify that strncpy(D, "", N) calls are transformed to memset(D, 0, N).
+
+define void @fold_strncpy_s0(i8* %dst, i64 %n) {
+; CHECK-LABEL: @fold_strncpy_s0(
+; CHECK-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; CHECK-NEXT:    store i8 0, i8* [[DST]], align 1
+; CHECK-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; CHECK-NEXT:    store i16 0, i16* [[TMP1]], align 1
+; CHECK-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false)
+; CHECK-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false)
+; CHECK-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT:    ret void
+;
+  %ps0 = getelementptr [9 x i8], [9 x i8]* @s4, i32 0, i32 4
+
+; Fold strncpy(D, "", 0) to just D.
+  %es0_0 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 0)
+  call void @sink(i8* %dst, i8* %es0_0)
+
+; Transform strncpy(D, "", 1) to *D = '\0, D.
+  %es0_1 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 1)
+  call void @sink(i8* %dst, i8* %es0_1)
+
+; Transform strncpy(D, "", 2) to memset(D, 0, 2), D.
+  %es0_2 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 2)
+  call void @sink(i8* %dst, i8* %es0_2)
+
+; Transform strncpy(D, "", 9) to memset(D, 0, 9), D.
+  %es0_9 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 9)
+  call void @sink(i8* %dst, i8* %es0_9)
+
+; Transform strncpy(D, "", n) to memset(D, 0, n), D.
+  %es0_n = call i8* @strncpy(i8* %dst, i8* %ps0, i64 %n)
+  call void @sink(i8* %dst, i8* %es0_n)
+
+  ret void
+}
+
+
+; Verify that strncpy(D, S, N) calls with nonconstant source S and constant
+; size are simplified when N < 2.
+
+define void @fold_strncpy_s(i8* %dst, i8* %src, i64 %n) {
+; CHECK-LABEL: @fold_strncpy_s(
+; CHECK-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; CHECK-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1
+; CHECK-NEXT:    store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1
+; CHECK-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT:    ret void
+;
+; Fold strncpy(D, S, 0) to just D.
+  %ed_0 = call i8* @strncpy(i8* %dst, i8* %src, i64 0)
+  call void @sink(i8* %dst, i8* %ed_0)
+
+; Transform strncpy(D, S, 1) to *D = '\0, D.
+  %ed_1 = call i8* @strncpy(i8* %dst, i8* %src, i64 1)
+  call void @sink(i8* %dst, i8* %ed_1)
+
+  ret void
+}
+
+
+; Verify that strncpy(D, S, N) calls with nonconstant source S and constant
+; size are not transformed when N is either unknown or greater than one.
+; Also verify that the arguments of the call are annotated with the right
+; attributes.
+
+define void @call_strncpy_s(i8* %dst, i8* %src, i64 %n) {
+; CHECK-LABEL: @call_strncpy_s(
+; CHECK-NEXT:    [[ED_2:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2)
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[ED_2]])
+; CHECK-NEXT:    [[ED_9:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[SRC]], i64 9)
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[ED_9]])
+; CHECK-NEXT:    [[ED_N:%.*]] = call i8* @strncpy(i8* [[DST]], i8* [[SRC]], i64 [[N:%.*]])
+; CHECK-NEXT:    call void @sink(i8* [[DST]], i8* [[ED_N]])
+; CHECK-NEXT:    ret void
+;
+; Do not transform strncpy(D, S, 2) when S is unknown.  Both *D and *S must
+; be derefernceable but neither D[1] nor S[1] need be.
+  %ed_2 = call i8* @strncpy(i8* %dst, i8* %src, i64 2)
+  call void @sink(i8* %dst, i8* %ed_2)
+
+; Do not transform strncpy(D, S, 9) when S is unknown..
+  %ed_9 = call i8* @strncpy(i8* %dst, i8* %src, i64 9)
+  call void @sink(i8* %dst, i8* %ed_9)
+
+; Do not transform strncpy(D, S, N) when all arguments are unknown.  Both
+; D and S must be nonnull but neither *D nor *S need be dereferenceable.
+; TODO: Both D and S should be annotated nonnull and noundef regardless
+; of the value of N.  See https://reviews.llvm.org/D124633.
+  %ed_n = call i8* @strncpy(i8* %dst, i8* %src, i64 %n)
+  call void @sink(i8* %dst, i8* %ed_n)
+
+  ret void
+}


        


More information about the llvm-commits mailing list