[llvm] e80e134 - [InstCombine] Add support for stpncpy folding
Martin Sebor via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 27 13:44:44 PDT 2022
Author: Martin Sebor
Date: 2022-09-27T14:44:33-06:00
New Revision: e80e134c77bb093370a3e4fee41ebe8710e3564d
URL: https://github.com/llvm/llvm-project/commit/e80e134c77bb093370a3e4fee41ebe8710e3564d
DIFF: https://github.com/llvm/llvm-project/commit/e80e134c77bb093370a3e4fee41ebe8710e3564d.diff
LOG: [InstCombine] Add support for stpncpy folding
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D130922
Added:
llvm/test/Transforms/InstCombine/stpncpy-1.ll
llvm/test/Transforms/InstCombine/strncpy-4.ll
Modified:
llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
llvm/test/Transforms/InstCombine/simplify-libcalls.ll
llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
llvm/test/Transforms/InstCombine/strncpy-1.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 2817f2bcfbb9f..a9e26c076c7ec 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -183,6 +183,9 @@ class LibCallSimplifier {
Value *optimizeRealloc(CallInst *CI, IRBuilderBase &B);
Value *optimizeWcslen(CallInst *CI, IRBuilderBase &B);
Value *optimizeBCopy(CallInst *CI, IRBuilderBase &B);
+
+ // Helper to optimize stpncpy and strncpy.
+ Value *optimizeStringNCpy(CallInst *CI, bool RetEnd, IRBuilderBase &B);
// Wrapper for all String/Memory Library Call Optimizations
Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilderBase &B);
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 232ec6f586ac9..a463ae3dc542a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -773,40 +773,62 @@ Value *LibCallSimplifier::optimizeStrLCpy(CallInst *CI, IRBuilderBase &B) {
return ConstantInt::get(CI->getType(), SrcLen);
}
-// Optimize a call to strncpy.
-
-Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
+// Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy
+// otherwise.
+Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd,
+ IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
Value *Size = CI->getArgOperand(2);
- annotateNonNullNoUndefBasedOnAccess(CI, 0);
- if (isKnownNonZero(Size, DL))
+
+ if (isKnownNonZero(Size, DL)) {
+ // Both st{p,r}ncpy(D, S, N) access the source and destination arrays
+ // only when N is nonzero.
+ annotateNonNullNoUndefBasedOnAccess(CI, 0);
annotateNonNullNoUndefBasedOnAccess(CI, 1);
+ }
- uint64_t Len;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(Size))
- Len = LengthArg->getZExtValue();
- else
- return nullptr;
+ // If the "bound" argument is known set N to it. Otherwise set it to
+ // UINT64_MAX and handle it later.
+ uint64_t N = UINT64_MAX;
+ if (ConstantInt *SizeC = dyn_cast<ConstantInt>(Size))
+ N = SizeC->getZExtValue();
- // strncpy(x, y, 0) -> x
- if (Len == 0)
+ if (N == 0)
+ // Fold st{p,r}ncpy(D, S, 0) to D.
return Dst;
- // See if we can get the length of the input string.
+ if (N == 1) {
+ Type *CharTy = B.getInt8Ty();
+ Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0");
+ B.CreateStore(CharVal, Dst);
+ if (!RetEnd)
+ // Transform strncpy(D, S, 1) to return (*D = *S), D.
+ return Dst;
+
+ // Transform stpncpy(D, S, 1) to return (*D = *S) ? D + 1 : D.
+ Value *ZeroChar = ConstantInt::get(CharTy, 0);
+ Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp");
+
+ Value *Off1 = B.getInt32(1);
+ Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end");
+ return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel");
+ }
+
+ // If the length of the input string is known set SrcLen to it.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen) {
+ if (SrcLen)
annotateDereferenceableBytes(CI, 1, SrcLen);
- --SrcLen; // Unbias length.
- } else {
+ else
return nullptr;
- }
+
+ --SrcLen; // Unbias length.
if (SrcLen == 0) {
- // strncpy(x, "", y) -> memset(x, '\0', y)
+ // Transform st{p,r}ncpy(D, "", N) to memset(D, '\0', N) for any N.
Align MemSetAlign =
- CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
+ CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
AttrBuilder ArgAttrs(CI->getContext(), CI->getAttributes().getParamAttrs(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
@@ -815,28 +837,37 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
return Dst;
}
- // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4)
- if (Len > SrcLen + 1) {
- if (Len <= 128) {
- StringRef Str;
- if (!getConstantStringInfo(Src, Str))
- return nullptr;
- std::string SrcStr = Str.str();
- SrcStr.resize(Len, '\0');
- Src = B.CreateGlobalString(SrcStr, "str");
- } else {
+ if (N > SrcLen + 1) {
+ if (N > 128)
+ // Bail if N is large or unknown.
return nullptr;
- }
+
+ // st{p,r}ncpy(D, "a", N) -> memcpy(D, "a\0\0\0", N) for N <= 128.
+ StringRef Str;
+ if (!getConstantStringInfo(Src, Str))
+ return nullptr;
+ std::string SrcStr = Str.str();
+ // Create a bigger, nul-padded array with the same length, SrcLen,
+ // as the original string.
+ SrcStr.resize(N, '\0');
+ Src = B.CreateGlobalString(SrcStr, "str");
}
Type *PT = Callee->getFunctionType()->getParamType(0);
- // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant]
+ // st{p,r}ncpy(D, S, N) -> memcpy(align 1 D, align 1 S, N) when both
+ // S and N are constant.
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
- ConstantInt::get(DL.getIntPtrType(PT), Len));
+ ConstantInt::get(DL.getIntPtrType(PT), N));
NewCI->setAttributes(CI->getAttributes());
NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
copyFlags(*CI, NewCI);
- return Dst;
+ if (!RetEnd)
+ return Dst;
+
+ // stpncpy(D, S, N) returns the address of the first null in D if it writes
+ // one, otherwise D + N.
+ Value *Off = B.getInt64(std::min(SrcLen, N));
+ return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr");
}
Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B,
@@ -3349,8 +3380,10 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeStpCpy(CI, Builder);
case LibFunc_strlcpy:
return optimizeStrLCpy(CI, Builder);
+ case LibFunc_stpncpy:
+ return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder);
case LibFunc_strncpy:
- return optimizeStrNCpy(CI, Builder);
+ return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder);
case LibFunc_strlen:
return optimizeStrLen(CI, Builder);
case LibFunc_strnlen:
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
index d5553095fdace..0c834a9b3ed60 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls-i16.ll
@@ -204,7 +204,7 @@ declare i16 @strcmp(i8*, i8*) #0
define void @test9(i8* %x) {
; CHECK32-LABEL: @test9(
-; CHECK32-NEXT: [[Y:%.*]] = call i16 @strcmp(i8* [[X:%.*]], i8* [[X]]) #[[ATTR5:[0-9]+]]
+; CHECK32-NEXT: [[Y:%.*]] = call i16 @strcmp(i8* [[X:%.*]], i8* [[X]]) #[[ATTR6:[0-9]+]]
; CHECK32-NEXT: ret void
;
; CHECK16-LABEL: @test9(
@@ -321,13 +321,13 @@ define i4 @strlen(i8* %s) {
ret i4 0
}
-; Test emission of stpncpy.
+; Test emission of stpncpy, including call attributes.
@a = dso_local global [4 x i8] c"123\00"
@b = dso_local global [5 x i8] zeroinitializer
declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef)
define signext i32 @emit_stpncpy() {
; CHECK-LABEL: @emit_stpncpy(
-; CHECK-NEXT: [[STPNCPY:%.*]] = call i8* @stpncpy(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
+; CHECK-NEXT: [[STPNCPY:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
; CHECK-NEXT: ret i32 0
;
%call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0),
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index 2ee085c31ef1c..16ba967b8b8fb 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -338,7 +338,7 @@ define i4 @strlen(i8* %s) {
declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef)
define signext i32 @emit_stpncpy() {
; CHECK-LABEL: @emit_stpncpy(
-; CHECK-NEXT: [[STPNCPY:%.*]] = call i8* @stpncpy(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
+; CHECK-NEXT: [[STPNCPY:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2)
; CHECK-NEXT: ret i32 0
;
%call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0),
diff --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
new file mode 100644
index 0000000000000..51e389e9c484e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
@@ -0,0 +1,466 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+;
+; Test that the stpncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -data-layout="E" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,BE
+; RUN: opt < %s -data-layout="e" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,LE
+
+declare i8* @stpncpy(i8*, i8*, i64)
+
+declare void @sink(i8*, i8*)
+
+ at a4 = constant [4 x i8] c"1234"
+ at s4 = constant [5 x i8] c"1234\00"
+
+
+; The following are generated by the stpncpy -> memcpy transformation
+; (trading space for speed).
+ at str = private constant [4 x i8] c"4\00\00\00"
+ at str.1 = private constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00"
+ at str.2 = private constant [10 x i8] c"1234\00\00\00\00\00\00"
+ at str.3 = private unnamed_addr constant [4 x i8] c"4\00\00\00", align 1
+ at str.4 = private unnamed_addr constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00", align 1
+ at str.5 = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+
+; Verify that the generated constants have the expected contents.
+; ANY: @[[A4:[a-zA-Z0-9_$"\\.-]+]] = constant [4 x i8] c"1234"
+; ANY: @[[S4:[a-zA-Z0-9_$"\\.-]+]] = constant [5 x i8] c"1234\00"
+; ANY: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private constant [4 x i8] c"4\00\00\00"
+; ANY: @[[STR_1:[a-zA-Z0-9_$"\\.-]+]] = private constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00"
+; ANY: @[[STR_2:[a-zA-Z0-9_$"\\.-]+]] = private constant [10 x i8] c"1234\00\00\00\00\00\00"
+; ANY: @[[STR_3:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [4 x i8] c"4\00\00\00", align 1
+; ANY: @[[STR_4:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00", align 1
+; ANY: @[[STR_5:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+; ANY: @[[STR_6:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [4 x i8] c"4\00\00\00", align 1
+; ANY: @[[STR_7:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00", align 1
+; ANY: @[[STR_8:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+; ANY: @[[STR_9:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
+
+; Verify that exactly overlapping stpncpy(D, D, N) calls are transformed
+; to D + strnlen(D, N) or, equivalently, D + (*D != '\0'), when N < 2.
+
+define void @fold_stpncpy_overlap(i8* %dst, i64 %n) {
+; ANY-LABEL: @fold_stpncpy_overlap(
+; ANY-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; ANY-NEXT: [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[DST]], align 1
+; ANY-NEXT: [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
+; ANY-NEXT: [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
+; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i8* [[STPNCPY_SEL]])
+; ANY-NEXT: ret void
+;
+; Fold stpncpy(D, D, 0) to just D.
+ %es_0 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 0)
+ call void @sink(i8* %dst, i8* %es_0)
+
+; Fold stpncpy(D, D, 1) to D + (*D != '\0').
+ %es_1 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 1)
+ call void @sink(i8* %dst, i8* %es_1)
+
+ ret void
+}
+
+
+; Verify that exactly overlapping stpncpy(D, D, N) calls are left alone
+; when N >= 2. Such calls are strictly undefined and while simplifying
+; them to the expected result is possible there is little to gain from it.
+
+define void @call_stpncpy_overlap(i8* %dst, i64 %n) {
+; ANY-LABEL: @call_stpncpy_overlap(
+; ANY-NEXT: [[ES_2:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 2)
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES_2]])
+; ANY-NEXT: [[ES_3:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 3)
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES_3]])
+; ANY-NEXT: [[ES_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* [[DST]], i64 [[N:%.*]])
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES_N]])
+; ANY-NEXT: ret void
+;
+; Do not transform stpncpy(D, D, 2).
+ %es_2 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 2)
+ call void @sink(i8* %dst, i8* %es_2)
+
+; Do not transform stpncpy(D, D, 3).
+ %es_3 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 3)
+ call void @sink(i8* %dst, i8* %es_3)
+
+; Do not transform stpncpy(D, D, N).
+ %es_n = call i8* @stpncpy(i8* %dst, i8* %dst, i64 %n)
+ call void @sink(i8* %dst, i8* %es_n)
+
+ ret void
+}
+
+
+; Verify that stpncpy(D, "", N) calls are transformed to memset(D, 0, N).
+
+define void @fold_stpncpy_s0(i8* %dst, i64 %n) {
+; ANY-LABEL: @fold_stpncpy_s0(
+; ANY-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; ANY-NEXT: store i8 0, i8* [[DST]], align 1
+; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; ANY-NEXT: store i16 0, i16* [[TMP1]], align 1
+; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT: call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false)
+; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false)
+; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT: ret void
+;
+ %ps0 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 4
+
+; Fold stpncpy(D, "", 0) to just D.
+ %es0_0 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 0)
+ call void @sink(i8* %dst, i8* %es0_0)
+
+; Transform stpncpy(D, "", 1) to *D = '\0, D.
+ %es0_1 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 1)
+ call void @sink(i8* %dst, i8* %es0_1)
+
+; Transform stpncpy(D, "", 2) to memset(D, 0, 2), D.
+ %es0_2 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 2)
+ call void @sink(i8* %dst, i8* %es0_2)
+
+; Transform stpncpy(D, "", 9) to memset(D, 0, 9), D.
+ %es0_9 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 9)
+ call void @sink(i8* %dst, i8* %es0_9)
+
+; Transform stpncpy(D, "", n) to memset(D, 0, n), D.
+ %es0_n = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 %n)
+ call void @sink(i8* %dst, i8* %es0_n)
+
+ ret void
+}
+
+
+; Verify that stpncpy(D, "4", N) calls are transformed to the equivalent
+; of strncpy(D, "4", N) and the result folded to D + (N != 0).
+
+define void @fold_stpncpy_s1(i8* %dst) {
+; BE-LABEL: @fold_stpncpy_s1(
+; BE-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; BE-NEXT: store i8 52, i8* [[DST]], align 1
+; BE-NEXT: [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; BE-NEXT: store i16 13312, i16* [[TMP1]], align 1
+; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.6, i64 0, i64 0), i64 3, i1 false)
+; BE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.7, i64 0, i64 0), i64 9, i1 false)
+; BE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT: ret void
+;
+; LE-LABEL: @fold_stpncpy_s1(
+; LE-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; LE-NEXT: store i8 52, i8* [[DST]], align 1
+; LE-NEXT: [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; LE-NEXT: store i16 52, i16* [[TMP1]], align 1
+; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.6, i64 0, i64 0), i64 3, i1 false)
+; LE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.7, i64 0, i64 0), i64 9, i1 false)
+; LE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT: ret void
+;
+ %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3
+
+; Fold stpncpy(D, "4", 0) to just D.
+ %es1_0 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 0)
+ call void @sink(i8* %dst, i8* %es1_0)
+
+; Transform stpncpy(D, "4", 1) to *D = '4', D + 1.
+ %es1_1 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 1)
+ call void @sink(i8* %dst, i8* %es1_1)
+
+; Transform stpncpy(D, "4", 2) to strncpy(D, "4", 2) + 1.
+ %es1_2 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 2)
+ call void @sink(i8* %dst, i8* %es1_2)
+
+; Transform stpncpy(D, "4", 3) to strncpy(D, "4", 3) + 1, which is then
+; transformed to memcpy(D, "4", 2), D[2] = '\0', D + 1.
+ %es1_3 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 3)
+ call void @sink(i8* %dst, i8* %es1_3)
+
+; Transform stpncpy(D, "4", 9) to strncpy(D, "4", 9) + 1.
+ %es1_9 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 9)
+ call void @sink(i8* %dst, i8* %es1_9)
+
+ ret void
+}
+
+
+; Verify that stpncpy(D, "1234", N) calls are transformed to the equivalent
+; of strncpy(D, "1234", N) and the result folded to D + min(4, N).
+
+define void @fold_stpncpy_s4(i8* %dst, i64 %n) {
+; BE-LABEL: @fold_stpncpy_s4(
+; BE-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; BE-NEXT: store i8 49, i8* [[DST]], align 1
+; BE-NEXT: [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; BE-NEXT: store i16 12594, i16* [[TMP1]], align 1
+; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false)
+; BE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; BE-NEXT: store i32 825373492, i32* [[TMP2]], align 1
+; BE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.8, i64 0, i64 0), i64 9, i1 false)
+; BE-NEXT: [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; BE-NEXT: ret void
+;
+; LE-LABEL: @fold_stpncpy_s4(
+; LE-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; LE-NEXT: store i8 49, i8* [[DST]], align 1
+; LE-NEXT: [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; LE-NEXT: store i16 12849, i16* [[TMP1]], align 1
+; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false)
+; LE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; LE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; LE-NEXT: store i32 875770417, i32* [[TMP2]], align 1
+; LE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.8, i64 0, i64 0), i64 9, i1 false)
+; LE-NEXT: [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; LE-NEXT: ret void
+;
+ %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0
+
+; Fold stpncpy(D, "1234", 0) to just D.
+ %es4_0 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 0)
+ call void @sink(i8* %dst, i8* %es4_0)
+
+; Transform stpncpy(D, "1234", 1) to *D = '4', D + 1.
+ %es4_1 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 1)
+ call void @sink(i8* %dst, i8* %es4_1)
+
+; Transform stpncpy(D, "1234", 2) to strncpy(D, "1234", 2) + 2.
+ %es4_2 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 2)
+ call void @sink(i8* %dst, i8* %es4_2)
+
+; Transform stpncpy(D, "1234", 3) to strncpy(D, "1234", 3) + 3
+ %es4_3 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 3)
+ call void @sink(i8* %dst, i8* %es4_3)
+
+; Transform stpncpy(D, "1234", 4) to strncpy(D, "1234", 4) + 4.
+ %es4_4 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 4)
+ call void @sink(i8* %dst, i8* %es4_4)
+
+; Transform stpncpy(D, "1234", 9) to strncpy(D, "1234", 9) + 4.
+ %es4_9 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 9)
+ call void @sink(i8* %dst, i8* %es4_9)
+
+ ret void
+}
+
+
+; Verify that a call to stpncpy(D, A, N) with a constant source larger
+; than one byte is left alone when N is unknown.
+
+define void @call_stpncpy_xx_n(i8* %dst, i64 %n) {
+; ANY-LABEL: @call_stpncpy_xx_n(
+; ANY-NEXT: [[EA1_N:%.*]] = call i8* @stpncpy(i8* [[DST:%.*]], i8* dereferenceable(2) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 3), i64 [[N:%.*]])
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[EA1_N]])
+; ANY-NEXT: [[EA4_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 [[N]])
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[EA4_N]])
+; ANY-NEXT: [[ES1_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* dereferenceable(2) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 3), i64 [[N]])
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES1_N]])
+; ANY-NEXT: [[ES4_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 [[N]])
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES4_N]])
+; ANY-NEXT: ret void
+;
+; Do not transform stpncpy(D, A4 + 3, N) when N is unknown.
+ %pa1 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 3
+ %ea1_n = call i8* @stpncpy(i8* %dst, i8* %pa1, i64 %n)
+ call void @sink(i8* %dst, i8* %ea1_n)
+
+; Do not transform stpncpy(D, A4, N) when N is unknown.
+ %pa4 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 0
+ %ea4_n = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 %n)
+ call void @sink(i8* %dst, i8* %ea4_n)
+
+; Do not transform stpncpy(D, "4", N) when N is unknown.
+ %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3
+ %es1_n = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 %n)
+ call void @sink(i8* %dst, i8* %es1_n)
+
+; Likewise, do not transform stpncpy(D, "1234", N) when N is unknown.
+ %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0
+ %es4_n = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 %n)
+ call void @sink(i8* %dst, i8* %es4_n)
+
+ ret void
+}
+
+; Verify that stpncpy(D, (char[4]){"1234"}, N) calls with an unterminated
+; source array are transformed to the equivalent strncpy call and the result
+; folded to D + min(4, N).
+
+define void @fold_stpncpy_a4(i8* %dst, i64 %n) {
+; BE-LABEL: @fold_stpncpy_a4(
+; BE-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; BE-NEXT: store i8 49, i8* [[DST]], align 1
+; BE-NEXT: [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; BE-NEXT: store i16 12594, i16* [[TMP1]], align 1
+; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 3, i1 false)
+; BE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; BE-NEXT: store i32 825373492, i32* [[TMP2]], align 1
+; BE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 5, i1 false)
+; BE-NEXT: [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.9, i64 0, i64 0), i64 9, i1 false)
+; BE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; BE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR4]])
+; BE-NEXT: ret void
+;
+; LE-LABEL: @fold_stpncpy_a4(
+; LE-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; LE-NEXT: store i8 49, i8* [[DST]], align 1
+; LE-NEXT: [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
+; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; LE-NEXT: store i16 12849, i16* [[TMP1]], align 1
+; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 3, i1 false)
+; LE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
+; LE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
+; LE-NEXT: store i32 875770417, i32* [[TMP2]], align 1
+; LE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 5, i1 false)
+; LE-NEXT: [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.9, i64 0, i64 0), i64 9, i1 false)
+; LE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
+; LE-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR4]])
+; LE-NEXT: ret void
+;
+
+ %pa4 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 0
+
+; Fold stpncpy(D, A4, 0) to just D.
+ %ea4_0 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 0)
+ call void @sink(i8* %dst, i8* %ea4_0)
+
+; Transform stpncpy(D, A4, 1) to *D = '4', D + 1.
+ %ea4_1 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 1)
+ call void @sink(i8* %dst, i8* %ea4_1)
+
+; Transform stpncpy(D, A4, 2) to strncpy(D, A4, 2) + 2.
+ %ea4_2 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 2)
+ call void @sink(i8* %dst, i8* %ea4_2)
+
+; Transform stpncpy(D, A4, 3) to strncpy(D, A4, 3) + 3
+ %ea4_3 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 3)
+ call void @sink(i8* %dst, i8* %ea4_3)
+
+; Transform stpncpy(D, A4, 4) to strncpy(D, A4, 4) + 4.
+ %ea4_4 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 4)
+ call void @sink(i8* %dst, i8* %ea4_4)
+
+; Transform stpncpy(D, A4, 5) to strncpy(D, A4, 5) + 4.
+ %ea4_5 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 5)
+ call void @sink(i8* %dst, i8* %ea4_5)
+
+; Transform stpncpy(D, A4, 9) to strncpy(D, A4, 9) + 4.
+ %ea4_9 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 9)
+ call void @sink(i8* %dst, i8* %ea4_9)
+
+ ret void
+}
+
+
+; Verify that stpncpy(D, S, N) calls with N < 2 are transformed to
+; the equivalent of strncpy and either folded to D if N == 0 or to
+; *D ? D + 1 : D otherwise.
+
+define void @fold_stpncpy_s(i8* %dst, i8* %src) {
+; ANY-LABEL: @fold_stpncpy_s(
+; ANY-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; ANY-NEXT: [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1
+; ANY-NEXT: store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1
+; ANY-NEXT: [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
+; ANY-NEXT: [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
+; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT: call void @sink(i8* nonnull [[DST]], i8* [[STPNCPY_SEL]])
+; ANY-NEXT: ret void
+;
+; Fold stpncpy(D, S, 0) to just D.
+ %es_0 = call i8* @stpncpy(i8* %dst, i8* %src, i64 0)
+ call void @sink(i8* %dst, i8* %es_0)
+
+; Transform stpncpy(D, "", 1) to *D = '\0, D.
+ %es_1 = call i8* @stpncpy(i8* %dst, i8* %src, i64 1)
+ call void @sink(i8* %dst, i8* %es_1)
+
+ ret void
+}
+
+
+; Verify that stpncpy(D, S, N) calls with N >= 2 are not transformed.
+; In theory they could be transformed to the equivalent of the following
+; though it's not clear that it would be a win:
+; P = memccpy(D, S, 0, N)
+; N' = P ? N - (P - D) : 0
+; Q = P ? P : D + N
+; memset(Q, 0, N')
+; Q
+; Also verify that the arguments of the call are annotated with the right
+; attributes.
+
+define void @call_stpncpy_s(i8* %dst, i8* %src, i64 %n) {
+; ANY-LABEL: @call_stpncpy_s(
+; ANY-NEXT: [[ES_2:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2)
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES_2]])
+; ANY-NEXT: [[ES_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* [[SRC]], i64 [[N:%.*]])
+; ANY-NEXT: call void @sink(i8* [[DST]], i8* [[ES_N]])
+; ANY-NEXT: ret void
+;
+; Do not transform stpncpy(D, S, 2). Both *D and *S must be derefernceable
+; but neither D[1] nor S[1] need be.
+ %es_2 = call i8* @stpncpy(i8* %dst, i8* %src, i64 2)
+ call void @sink(i8* %dst, i8* %es_2)
+
+; Do not transform stpncpy(D, S, N). Both D and S must be nonnull but
+; neither *D nor *S need be dereferenceable.
+; TODO: Both D and S should be annotated nonnull and noundef regardless
+; of the value of N. See https://reviews.llvm.org/D124633.
+ %es_n = call i8* @stpncpy(i8* %dst, i8* %src, i64 %n)
+ call void @sink(i8* %dst, i8* %es_n)
+
+ ret void
+}
+;.
+; ANY: attributes #[[ATTR0:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn writeonly }
+; ANY: attributes #[[ATTR1:[0-9]+]] = { argmemonly nocallback nofree nounwind willreturn }
+;.
diff --git a/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll b/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
index 744ade0d28f9e..f70fd517da6ed 100644
--- a/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
+++ b/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll
@@ -60,12 +60,11 @@ entry:
ret void
}
-; Note: stpncpy is not handled by SimplifyLibcalls yet, so this should not be changed.
define void @test_stpncpy_to_memcpy(i8 addrspace(200)* %dst) addrspace(200) nounwind {
; CHECK-LABEL: define {{[^@]+}}@test_stpncpy_to_memcpy
; CHECK-SAME: (i8 addrspace(200)* [[DST:%.*]]) addrspace(200) #[[ATTR1]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[CALL:%.*]] = call addrspace(200) i8 addrspace(200)* @stpncpy(i8 addrspace(200)* [[DST]], i8 addrspace(200)* getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0), i64 17)
+; CHECK-NEXT: call addrspace(200) void @llvm.memcpy.p200i8.p200i8.i128(i8 addrspace(200)* noundef align 1 dereferenceable(17) [[DST]], i8 addrspace(200)* noundef align 1 dereferenceable(17) getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0), i128 17, i1 false)
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/strncpy-1.ll b/llvm/test/Transforms/InstCombine/strncpy-1.ll
index a935c431bca5b..78d2fdc0f7c9e 100644
--- a/llvm/test/Transforms/InstCombine/strncpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/strncpy-1.ll
@@ -107,8 +107,14 @@ define void @test_simplify6(i8* %dst) {
define void @test_simplify7(i8* %dst, i32 %n) {
; CHECK-LABEL: @test_simplify7(
-; CHECK-NEXT: [[TMP1:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(80) [[DST:%.*]], i8* getelementptr inbounds ([1 x i8], [1 x i8]* @null, i32 0, i32 0), i32 [[N:%.*]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 dereferenceable(80) [[DST:%.*]], i8 0, i32 [[N:%.*]], i1 false)
; CHECK-NEXT: ret void
+;
+; Unless N is known to be nonzero, strncpy(D, "", N) need not access any
+; bytes in D.
+; TODO: The argument, already annotated dereferenceable, should be
+; annotated noundef and nonnull by the transformation. See
+; https://reviews.llvm.org/D124633.
;
%src = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
call i8* @strncpy(i8* dereferenceable(80) %dst, i8* %src, i32 %n)
@@ -117,8 +123,14 @@ define void @test_simplify7(i8* %dst, i32 %n) {
define i8* @test1(i8* %dst, i8* %src, i32 %n) {
; CHECK-LABEL: @test1(
-; CHECK-NEXT: [[RET:%.*]] = call i8* @strncpy(i8* noundef nonnull [[DST:%.*]], i8* nonnull [[SRC:%.*]], i32 [[N:%.*]])
+; CHECK-NEXT: [[RET:%.*]] = call i8* @strncpy(i8* nonnull [[DST:%.*]], i8* nonnull [[SRC:%.*]], i32 [[N:%.*]])
; CHECK-NEXT: ret i8* [[RET]]
+;
+; Unless N is known to be nonzero, strncpy(D, S, N) need not access any
+; bytes in either D or S. Verify that the call isn't annotated with
+; the dereferenceable attribute.
+; TODO: Both arguments should be annotated noundef in addition to nonnull.
+; See https://reviews.llvm.org/D124633.
;
%ret = call i8* @strncpy(i8* nonnull %dst, i8* nonnull %src, i32 %n)
ret i8* %ret
@@ -182,8 +194,8 @@ define void @test_no_simplify2() {
define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) {
; CHECK-LABEL: @test_no_simplify3(
-; CHECK-NEXT: %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32)
-; CHECK-NEXT: ret i8* %ret
+; CHECK-NEXT: [[RET:%.*]] = musttail call i8* @strncpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 32)
+; CHECK-NEXT: ret i8* [[RET]]
;
%ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32)
ret i8* %ret
@@ -191,8 +203,8 @@ define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) {
define i8* @test_no_simplify4(i8* %dst, i8* %src, i32 %count) {
; CHECK-LABEL: @test_no_simplify4(
-; CHECK-NEXT: %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6)
-; CHECK-NEXT: ret i8* %ret
+; CHECK-NEXT: [[RET:%.*]] = musttail call i8* @strncpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 6)
+; CHECK-NEXT: ret i8* [[RET]]
;
%ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6)
ret i8* %ret
diff --git a/llvm/test/Transforms/InstCombine/strncpy-4.ll b/llvm/test/Transforms/InstCombine/strncpy-4.ll
new file mode 100644
index 0000000000000..6b9f79b09261e
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strncpy-4.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;
+; Test that strncpy(D, S, N) calls with the empty string S as a source
+; are simplified for all values of N.
+;
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i8* @strncpy(i8*, i8*, i64)
+
+; A string of length 4 but size 9 to also verify that characters after
+; the nul don't affect the transformation.
+ at s4 = constant [9 x i8] c"1234\00567\00"
+
+declare void @sink(i8*, i8*)
+
+
+; Verify that exactly overlapping strncpy(D, D, N) calls are simplified
+; only when N < 2.
+
+define void @fold_strncpy_overlap(i8* %dst, i64 %n) {
+; CHECK-LABEL: @fold_strncpy_overlap(
+; CHECK-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[DST]])
+; CHECK-NEXT: ret void
+;
+; Fold strncpy(D, D, 0) to D.
+ %ed_0 = call i8* @strncpy(i8* %dst, i8* %dst, i64 0)
+ call void @sink(i8* %dst, i8* %ed_0)
+
+; Fold strncpy(D, D, 1) to D.
+ %ed_1 = call i8* @strncpy(i8* %dst, i8* %dst, i64 1)
+ call void @sink(i8* %dst, i8* %ed_1)
+
+ ret void
+}
+
+
+; Verify that exactly overlapping strncpy(D, D, N) calls are left alone
+; when N >= 2.
+; Such calls are undefined and although they're benign and could be
+; simplified to
+; memset(D + strnlen(D, N), D, N - strnlen(D, N))
+; there is little to gain from it.
+
+define void @call_strncpy_overlap(i8* %dst, i64 %n) {
+; CHECK-LABEL: @call_strncpy_overlap(
+; CHECK-NEXT: [[ED_2:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 2)
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[ED_2]])
+; CHECK-NEXT: [[ED_3:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 3)
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[ED_3]])
+; CHECK-NEXT: [[ED_N:%.*]] = call i8* @strncpy(i8* [[DST]], i8* [[DST]], i64 [[N:%.*]])
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[ED_N]])
+; CHECK-NEXT: ret void
+;
+
+; Do not transform strncpy(D, D, 2).
+ %ed_2 = call i8* @strncpy(i8* %dst, i8* %dst, i64 2)
+ call void @sink(i8* %dst, i8* %ed_2)
+
+; Do not transform strncpy(D, D, 3).
+ %ed_3 = call i8* @strncpy(i8* %dst, i8* %dst, i64 3)
+ call void @sink(i8* %dst, i8* %ed_3)
+
+; Do not transform strncpy(D, D, N).
+ %ed_n = call i8* @strncpy(i8* %dst, i8* %dst, i64 %n)
+ call void @sink(i8* %dst, i8* %ed_n)
+
+ ret void
+}
+
+
+; Verify that strncpy(D, "", N) calls are transformed to memset(D, 0, N).
+
+define void @fold_strncpy_s0(i8* %dst, i64 %n) {
+; CHECK-LABEL: @fold_strncpy_s0(
+; CHECK-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; CHECK-NEXT: store i8 0, i8* [[DST]], align 1
+; CHECK-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
+; CHECK-NEXT: store i16 0, i16* [[TMP1]], align 1
+; CHECK-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false)
+; CHECK-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false)
+; CHECK-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT: ret void
+;
+ %ps0 = getelementptr [9 x i8], [9 x i8]* @s4, i32 0, i32 4
+
+; Fold strncpy(D, "", 0) to just D.
+ %es0_0 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 0)
+ call void @sink(i8* %dst, i8* %es0_0)
+
+; Transform strncpy(D, "", 1) to *D = '\0, D.
+ %es0_1 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 1)
+ call void @sink(i8* %dst, i8* %es0_1)
+
+; Transform strncpy(D, "", 2) to memset(D, 0, 2), D.
+ %es0_2 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 2)
+ call void @sink(i8* %dst, i8* %es0_2)
+
+; Transform strncpy(D, "", 9) to memset(D, 0, 9), D.
+ %es0_9 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 9)
+ call void @sink(i8* %dst, i8* %es0_9)
+
+; Transform strncpy(D, "", n) to memset(D, 0, n), D.
+ %es0_n = call i8* @strncpy(i8* %dst, i8* %ps0, i64 %n)
+ call void @sink(i8* %dst, i8* %es0_n)
+
+ ret void
+}
+
+
+; Verify that strncpy(D, S, N) calls with nonconstant source S and constant
+; size are simplified when N < 2.
+
+define void @fold_strncpy_s(i8* %dst, i8* %src, i64 %n) {
+; CHECK-LABEL: @fold_strncpy_s(
+; CHECK-NEXT: call void @sink(i8* [[DST:%.*]], i8* [[DST]])
+; CHECK-NEXT: [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1
+; CHECK-NEXT: store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1
+; CHECK-NEXT: call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; CHECK-NEXT: ret void
+;
+; Fold strncpy(D, S, 0) to just D.
+ %ed_0 = call i8* @strncpy(i8* %dst, i8* %src, i64 0)
+ call void @sink(i8* %dst, i8* %ed_0)
+
+; Transform strncpy(D, S, 1) to *D = '\0, D.
+ %ed_1 = call i8* @strncpy(i8* %dst, i8* %src, i64 1)
+ call void @sink(i8* %dst, i8* %ed_1)
+
+ ret void
+}
+
+
+; Verify that strncpy(D, S, N) calls with nonconstant source S and constant
+; size are not transformed when N is either unknown or greater than one.
+; Also verify that the arguments of the call are annotated with the right
+; attributes.
+
+define void @call_strncpy_s(i8* %dst, i8* %src, i64 %n) {
+; CHECK-LABEL: @call_strncpy_s(
+; CHECK-NEXT: [[ED_2:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2)
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[ED_2]])
+; CHECK-NEXT: [[ED_9:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[SRC]], i64 9)
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[ED_9]])
+; CHECK-NEXT: [[ED_N:%.*]] = call i8* @strncpy(i8* [[DST]], i8* [[SRC]], i64 [[N:%.*]])
+; CHECK-NEXT: call void @sink(i8* [[DST]], i8* [[ED_N]])
+; CHECK-NEXT: ret void
+;
+; Do not transform strncpy(D, S, 2) when S is unknown. Both *D and *S must
+; be derefernceable but neither D[1] nor S[1] need be.
+ %ed_2 = call i8* @strncpy(i8* %dst, i8* %src, i64 2)
+ call void @sink(i8* %dst, i8* %ed_2)
+
+; Do not transform strncpy(D, S, 9) when S is unknown..
+ %ed_9 = call i8* @strncpy(i8* %dst, i8* %src, i64 9)
+ call void @sink(i8* %dst, i8* %ed_9)
+
+; Do not transform strncpy(D, S, N) when all arguments are unknown. Both
+; D and S must be nonnull but neither *D nor *S need be dereferenceable.
+; TODO: Both D and S should be annotated nonnull and noundef regardless
+; of the value of N. See https://reviews.llvm.org/D124633.
+ %ed_n = call i8* @strncpy(i8* %dst, i8* %src, i64 %n)
+ call void @sink(i8* %dst, i8* %ed_n)
+
+ ret void
+}
More information about the llvm-commits
mailing list