[llvm] e263a76 - [InstCombine] Look through more casts when folding memchr and memcmp

Tue Jun 28 14:59:57 PDT 2022

Author: Martin Sebor
Date: 2022-06-28T15:58:42-06:00
New Revision: e263a7670e28d880ec45971f91fa88de01fc51e1

URL: https://github.com/llvm/llvm-project/commit/e263a7670e28d880ec45971f91fa88de01fc51e1
DIFF: https://github.com/llvm/llvm-project/commit/e263a7670e28d880ec45971f91fa88de01fc51e1.diff

LOG: [InstCombine] Look through more casts when folding memchr and memcmp

Enhance getConstantDataArrayInfo to let the memchr and memcmp library
call folders look through arbitrarily long sequences of bitcast and
GEP instructions.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D128364

Added: 
    llvm/test/Transforms/InstCombine/memchr-10.ll
    llvm/test/Transforms/InstCombine/memchr-9.ll
    llvm/test/Transforms/InstCombine/memcmp-7.ll
    llvm/test/Transforms/InstCombine/memcmp-8.ll
    llvm/test/Transforms/InstCombine/memrchr-7.ll
    llvm/test/Transforms/InstCombine/strcall-no-nul.ll
    llvm/test/Transforms/InstCombine/strlen-9.ll

Modified: 
    llvm/lib/Analysis/ValueTracking.cpp
    llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
    llvm/test/Transforms/InstCombine/str-int-3.ll
    llvm/test/Transforms/InstCombine/strnlen-1.ll
    llvm/test/Transforms/InstCombine/wcslen-1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 801167fe281af..05d5e47bb8d74 100644

--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4187,45 +4187,30 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
                                     unsigned ElementSize, uint64_t Offset) {
   assert(V);
 
-  // Look through bitcast instructions and geps.
-  V = V->stripPointerCasts();
-
-  // If the value is a GEP instruction or constant expression, treat it as an
-  // offset.
-  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-    // Fail if the first GEP operand is not a constant zero and we're
-    // not indexing into the initializer.
-    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
-    if (!FirstIdx || !FirstIdx->isZero())
-      return false;
+  // Drill down into the pointer expression V, ignoring any intervening
+  // casts, and determine the identity of the object it references along
+  // with the cumulative byte offset into it.
+  const GlobalVariable *GV =
+    dyn_cast<GlobalVariable>(getUnderlyingObject(V));
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+    // Fail if V is not based on constant global object.
+    return false;
 
-    Value *Op0 = GEP->getOperand(0);
-    const GlobalVariable *GV = dyn_cast<GlobalVariable>(Op0);
-    if (!GV)
-      return false;
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
 
-    // Fail if the offset into the initializer is not constant.
-    const DataLayout &DL = GV->getParent()->getDataLayout();
-    APInt Off(DL.getIndexSizeInBits(GEP->getPointerAddressSpace()), 0);
-    if (!GEP->accumulateConstantOffset(DL, Off))
-      return false;
+  if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
+                                                 /*AllowNonInbounds*/ true))
+    // Fail if a constant offset could not be determined.
+    return false;
 
+  uint64_t StartIdx = Off.getLimitedValue();
+  if (StartIdx == UINT64_MAX)
     // Fail if the constant offset is excessive.
-    uint64_t StartIdx = Off.getLimitedValue();
-    if (StartIdx == UINT64_MAX)
-      return false;
-
-    return getConstantDataArrayInfo(Op0, Slice, ElementSize, StartIdx + Offset);
-  }
-
-  // The GEP instruction, constant or instruction, must reference a global
-  // variable that is a constant and is initialized. The referenced constant
-  // initializer is the array that we'll use for optimization.
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
-  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
 
-  const DataLayout &DL = GV->getParent()->getDataLayout();
+  Offset += StartIdx;
+
   ConstantDataArray *Array = nullptr;
   ArrayType *ArrayTy = nullptr;
 
@@ -4233,14 +4218,14 @@ bool llvm::getConstantDataArrayInfo(const Value *V,
     Type *GVTy = GV->getValueType();
     uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
     uint64_t Length = SizeInBytes / (ElementSize / 8);
-    if (Length <= Offset)
-      // Bail on undersized constants to let sanitizers detect library
-      // calls with them as arguments.
-      return false;
 
     Slice.Array = nullptr;
     Slice.Offset = 0;
-    Slice.Length = Length - Offset;
+    // Return an empty Slice for undersized constants to let callers
+    // transform even undefined library calls into simpler, well-defined
+    // expressions.  This is preferable to making the calls although it
+    // prevents sanitizers from detecting such calls.
+    Slice.Length = Length < Offset ? 0 : Length - Offset;
     return true;
   }
 
@@ -4292,6 +4277,12 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
 
   if (Slice.Array == nullptr) {
     if (TrimAtNul) {
+      // Return a nul-terminated string even for an empty Slice.  This is
+      // safe because all existing SimplifyLibcalls callers require string
+      // arguments and the behavior of the functions they fold is undefined
+      // otherwise.  Folding the calls this way is preferable to making
+      // the undefined library calls, even though it prevents sanitizers
+      // from reporting such calls.
       Str = StringRef();
       return true;
     }
@@ -4371,9 +4362,13 @@ static uint64_t GetStringLengthH(const Value *V,
     return 0;
 
   if (Slice.Array == nullptr)
+    // Zeroinitializer (including an empty one).
     return 1;
 
-  // Search for nul characters
+  // Search for the first nul character.  Return a conservative result even
+  // when there is no nul.  This is safe since otherwise the string function
+  // being folded such as strlen is undefined, and can be preferable to
+  // making the undefined library call.
   unsigned NullIndex = 0;
   for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
     if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)

diff  --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b10a4146ef91e..83dae25dfd5e2 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -945,6 +945,12 @@ Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
   if (!getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false))
     return nullptr;
 
+  if (Str.size() == 0)
+    // If the array is empty fold memrchr(A, C, N) to null for any value
+    // of C and N on the basis that the only valid value of N is zero
+    // (otherwise the call is undefined).
+    return NullPtr;
+
   uint64_t EndOff = UINT64_MAX;
   if (LenC) {
     EndOff = LenC->getZExtValue();
@@ -1046,6 +1052,12 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
     return B.CreateSelect(Cmp, NullPtr, SrcPlus);
   }
 
+  if (Str.size() == 0)
+    // If the array is empty fold memchr(A, C, N) to null for any value
+    // of C and N on the basis that the only valid value of N is zero
+    // (otherwise the call is undefined).
+    return NullPtr;
+
   if (LenC)
     Str = substr(Str, LenC->getZExtValue());
 

diff  --git a/llvm/test/Transforms/InstCombine/memchr-10.ll b/llvm/test/Transforms/InstCombine/memchr-10.ll
new file mode 100644
index 0000000000000..9d46e5159107b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memchr-10.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that the result of memchr calls with past-the-end pointers used
+; in equality expressions don't cause trouble and either are folded when
+; they might be valid or not when they're provably undefined.
+
+declare i8* @memchr(i8*, i32, i64)
+
+
+ at a5 = constant [5 x i8] c"12345"
+
+
+; Fold memchr(a5 + 5, c, 1) == a5 + 5 to an arbitrary constrant.
+; The call is transformed to a5[5] == c by the memchr simplifier, with
+; a5[5] being indeterminate.  The equality then is the folded with
+; an undefined/arbitrary result.
+
+define i1 @call_memchr_ap5_c_1_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memchr_ap5_c_1_eq_a(
+; CHECK-NEXT:    ret i1
+;
+  %pap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %qap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 1, i32 0
+  %q = call i8* @memchr(i8* %pap5, i32 %c, i64 1)
+  %cmp = icmp eq i8* %q, %qap5
+  ret i1 %cmp
+}
+
+
+; Fold memchr(a5 + 5, c, 5) == a5 + 5 to an arbitrary constant.
+
+define i1 @call_memchr_ap5_c_5_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memchr_ap5_c_5_eq_a(
+; CHECK-NEXT:    ret i1
+;
+  %pap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %qap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 1, i32 0
+  %q = call i8* @memchr(i8* %pap5, i32 %c, i64 5)
+  %cmp = icmp eq i8* %q, %qap5
+  ret i1 %cmp
+}
+
+
+; Fold memchr(a5 + 5, c, n) == a5 to false.
+
+define i1 @fold_memchr_ap5_c_n_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memchr_ap5_c_n_eq_a(
+; CHECK-NEXT:    ret i1 false
+;
+  %pa = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %pap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @memchr(i8* %pap5, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, %pa
+  ret i1 %cmp
+}
+
+
+; Fold memchr(a5 + 5, c, n) == null to true on the basis that n must
+; be zero in order for the call to be valid.
+
+define i1 @fold_memchr_ap5_c_n_eqz(i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memchr_ap5_c_n_eqz(
+; CHECK-NEXT:    ret i1 true
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @memchr(i8* %p, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, null
+  ret i1 %cmp
+}
+
+
+; Fold memchr(a5 + 5, '\0', n) == null to true on the basis that n must
+; be zero in order for the call to be valid.
+
+define i1 @fold_memchr_a_nul_n_eqz(i64 %n) {
+; CHECK-LABEL: @fold_memchr_a_nul_n_eqz(
+; CHECK-NEXT:    ret i1 true
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @memchr(i8* %p, i32 0, i64 %n)
+  %cmp = icmp eq i8* %q, null
+  ret i1 %cmp
+}

diff  --git a/llvm/test/Transforms/InstCombine/memchr-9.ll b/llvm/test/Transforms/InstCombine/memchr-9.ll
new file mode 100644
index 0000000000000..1e6178e511ac2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memchr-9.ll
@@ -0,0 +1,324 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Verify that calls to memchr with arrays of elements larger than char
+; are folded correctly.
+; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BE-CHECK
+; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LE-CHECK
+;
+; Exercise folding of memchr calls with addition expressions involving
+; pointers into constant arrays of types larger than char and fractional
+; offsets.
+
+declare i8* @memchr(i8*, i32, i64)
+
+%struct.A = type { [2 x i16], [2 x i16] }
+
+; Hex byte representation: 00 00  01 01  02 02  03 03
+ at a = constant [1 x %struct.A] [%struct.A { [2 x i16] [i16 0, i16 257], [2 x i16] [i16 514, i16 771] }]
+
+
+define void @fold_memchr_A_pIb_cst_cst(i8** %pchr) {
+; CHECK-LABEL: @fold_memchr_A_pIb_cst_cst(
+; CHECK-NEXT:    store i8* bitcast ([1 x %struct.A]* @a to i8*), i8** [[PCHR:%.*]], align 8
+; CHECK-NEXT:    [[PST_0_1_1:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 1
+; CHECK-NEXT:    store i8* null, i8** [[PST_0_1_1]], align 8
+; CHECK-NEXT:    [[PST_0_4_4:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 2
+; CHECK-NEXT:    store i8* null, i8** [[PST_0_4_4]], align 8
+; CHECK-NEXT:    [[PST_1_0_1:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 3
+; CHECK-NEXT:    store i8* getelementptr (i8, i8* bitcast ([1 x %struct.A]* @a to i8*), i64 1), i8** [[PST_1_0_1]], align 8
+; CHECK-NEXT:    [[PST_1_0_3:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 4
+; CHECK-NEXT:    store i8* getelementptr (i8, i8* bitcast ([1 x %struct.A]* @a to i8*), i64 1), i8** [[PST_1_0_3]], align 8
+; CHECK-NEXT:    [[PST_1_1_1:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 5
+; CHECK-NEXT:    store i8* null, i8** [[PST_1_1_1]], align 8
+; CHECK-NEXT:    [[PST_1_1_2:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 6
+; CHECK-NEXT:    store i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 0, i64 1) to i8*), i8** [[PST_1_1_2]], align 8
+; CHECK-NEXT:    [[PST_1_3_3:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 7
+; CHECK-NEXT:    store i8* null, i8** [[PST_1_3_3]], align 8
+; CHECK-NEXT:    [[PST_1_3_4:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 8
+; CHECK-NEXT:    store i8* null, i8** [[PST_1_3_4]], align 8
+; CHECK-NEXT:    [[PST_1_3_6:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 10
+; CHECK-NEXT:    store i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 1, i64 1) to i8*), i8** [[PST_1_3_6]], align 8
+; CHECK-NEXT:    ret void
+;
+  %pa = getelementptr [1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0
+  %pi8a = bitcast %struct.A* %pa to i8*
+
+  %pi8ap0 = getelementptr i8, i8* %pi8a, i32 0
+
+  ; Fold memchr((char*)a + 0, '\0', 1) to a.
+  %pst_0_0_1 = getelementptr i8*, i8** %pchr, i32 0
+  %chr_0_0_1 = call i8* @memchr(i8* %pi8ap0, i32 0, i64 1)
+  store i8* %chr_0_0_1, i8** %pst_0_0_1
+
+  ; Fold memchr((char*)a + 0, '\01', 1) to null.
+  %pst_0_1_1 = getelementptr i8*, i8** %pchr, i32 1
+  %chr_0_1_1 = call i8* @memchr(i8* %pi8ap0, i32 1, i64 1)
+  store i8* %chr_0_1_1, i8** %pst_0_1_1
+
+  ; Fold memchr((char*)a + 0, '\04', 4) to null.
+  %pst_0_4_4 = getelementptr i8*, i8** %pchr, i32 2
+  %chr_0_4_4 = call i8* @memchr(i8* %pi8ap0, i32 4, i64 4)
+  store i8* %chr_0_4_4, i8** %pst_0_4_4
+
+
+  %pi8ap1 = getelementptr i8, i8* %pi8a, i32 1
+
+  ; Fold memchr((char*)a + 1, '\0', 1) to (char*)a + 1.
+  %pst_1_0_1 = getelementptr i8*, i8** %pchr, i32 3
+  %chr_1_0_1 = call i8* @memchr(i8* %pi8ap1, i32 0, i64 1)
+  store i8* %chr_1_0_1, i8** %pst_1_0_1
+
+  ; Fold memchr((char*)a + 1, '\0', 3) to (char*)a + 1.
+  %pst_1_0_3 = getelementptr i8*, i8** %pchr, i32 4
+  %chr_1_0_3 = call i8* @memchr(i8* %pi8ap1, i32 0, i64 3)
+  store i8* %chr_1_0_3, i8** %pst_1_0_3
+
+  ; Fold memchr((char*)a + 1, '\01', 1) to null.
+  %pst_1_1_1 = getelementptr i8*, i8** %pchr, i32 5
+  %chr_1_1_1 = call i8* @memchr(i8* %pi8ap1, i32 1, i64 1)
+  store i8* %chr_1_1_1, i8** %pst_1_1_1
+
+  ; Fold memchr((char*)a + 1, '\01', 2) to (char*)a + 2.
+  %pst_1_1_2 = getelementptr i8*, i8** %pchr, i32 6
+  %chr_1_1_2 = call i8* @memchr(i8* %pi8ap1, i32 1, i64 2)
+  store i8* %chr_1_1_2, i8** %pst_1_1_2
+
+  ; Fold memchr((char*)a + 1, '\03', 3) to null.
+  %pst_1_3_3 = getelementptr i8*, i8** %pchr, i32 7
+  %chr_1_3_3 = call i8* @memchr(i8* %pi8ap1, i32 3, i64 3)
+  store i8* %chr_1_3_3, i8** %pst_1_3_3
+
+  ; Fold memchr((char*)a + 1, '\03', 4) to null.
+  %pst_1_3_4 = getelementptr i8*, i8** %pchr, i32 8
+  %chr_1_3_4 = call i8* @memchr(i8* %pi8ap1, i32 3, i64 4)
+  store i8* %chr_1_3_4, i8** %pst_1_3_4
+
+  ; Fold memchr((char*)a + 1, '\03', 5) to null.
+  %pst_1_3_5 = getelementptr i8*, i8** %pchr, i32 9
+  %chr_1_3_5 = call i8* @memchr(i8* %pi8ap1, i32 3, i64 5)
+  store i8* %chr_1_3_4, i8** %pst_1_3_4
+
+  ; Fold memchr((char*)a + 1, '\03', 6) to (char*)a + 5.
+  %pst_1_3_6 = getelementptr i8*, i8** %pchr, i32 10
+  %chr_1_3_6 = call i8* @memchr(i8* %pi8ap1, i32 3, i64 6)
+  store i8* %chr_1_3_6, i8** %pst_1_3_6
+
+
+  ret void
+}
+
+
+define void @fold_memchr_A_pIb_cst_N(i64 %N, i8** %pchr) {
+; CHECK-LABEL: @fold_memchr_A_pIb_cst_N(
+; CHECK-NEXT:    [[MEMCHR_CMP:%.*]] = icmp eq i64 [[N:%.*]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[MEMCHR_CMP]], i8* null, i8* bitcast ([1 x %struct.A]* @a to i8*)
+; CHECK-NEXT:    store i8* [[TMP1]], i8** [[PCHR:%.*]], align 8
+; CHECK-NEXT:    [[PST_0_1_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 1
+; CHECK-NEXT:    [[MEMCHR_CMP1:%.*]] = icmp ult i64 [[N]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[MEMCHR_CMP1]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 0, i64 1) to i8*)
+; CHECK-NEXT:    store i8* [[TMP2]], i8** [[PST_0_1_N]], align 8
+; CHECK-NEXT:    [[PST_0_4_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 2
+; CHECK-NEXT:    store i8* null, i8** [[PST_0_4_N]], align 8
+; CHECK-NEXT:    [[PST_1_0_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 3
+; CHECK-NEXT:    [[MEMCHR_CMP2:%.*]] = icmp eq i64 [[N]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[MEMCHR_CMP2]], i8* null, i8* getelementptr (i8, i8* bitcast ([1 x %struct.A]* @a to i8*), i64 1)
+; CHECK-NEXT:    store i8* [[TMP3]], i8** [[PST_1_0_N]], align 8
+; CHECK-NEXT:    [[PST_1_1_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 4
+; CHECK-NEXT:    [[MEMCHR_CMP3:%.*]] = icmp ult i64 [[N]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[MEMCHR_CMP3]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 0, i64 1) to i8*)
+; CHECK-NEXT:    store i8* [[TMP4]], i8** [[PST_1_1_N]], align 8
+; CHECK-NEXT:    [[PST_1_2_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 5
+; CHECK-NEXT:    [[MEMCHR_CMP4:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[MEMCHR_CMP4]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 1, i64 0) to i8*)
+; CHECK-NEXT:    store i8* [[TMP5]], i8** [[PST_1_2_N]], align 8
+; CHECK-NEXT:    [[PST_1_3_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 6
+; CHECK-NEXT:    [[MEMCHR_CMP5:%.*]] = icmp ult i64 [[N]], 6
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[MEMCHR_CMP5]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 1, i64 1) to i8*)
+; CHECK-NEXT:    store i8* [[TMP6]], i8** [[PST_1_3_N]], align 8
+; CHECK-NEXT:    [[PST_1_4_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 7
+; CHECK-NEXT:    store i8* null, i8** [[PST_1_4_N]], align 8
+; CHECK-NEXT:    [[PST_2_0_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 8
+; CHECK-NEXT:    store i8* null, i8** [[PST_2_0_N]], align 8
+; CHECK-NEXT:    [[PST_2_1_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 9
+; CHECK-NEXT:    [[MEMCHR_CMP6:%.*]] = icmp eq i64 [[N]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[MEMCHR_CMP6]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 0, i64 1) to i8*)
+; CHECK-NEXT:    store i8* [[TMP7]], i8** [[PST_2_1_N]], align 8
+; CHECK-NEXT:    [[PST_2_2_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 10
+; CHECK-NEXT:    [[MEMCHR_CMP7:%.*]] = icmp ult i64 [[N]], 3
+; CHECK-NEXT:    [[TMP8:%.*]] = select i1 [[MEMCHR_CMP7]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 1, i64 0) to i8*)
+; CHECK-NEXT:    store i8* [[TMP8]], i8** [[PST_2_2_N]], align 8
+; CHECK-NEXT:    [[PST_2_3_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 11
+; CHECK-NEXT:    [[MEMCHR_CMP8:%.*]] = icmp ult i64 [[N]], 5
+; CHECK-NEXT:    [[TMP9:%.*]] = select i1 [[MEMCHR_CMP8]], i8* null, i8* bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0, i32 1, i64 1) to i8*)
+; CHECK-NEXT:    store i8* [[TMP9]], i8** [[PST_2_3_N]], align 8
+; CHECK-NEXT:    [[PST_2_4_N:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 12
+; CHECK-NEXT:    store i8* null, i8** [[PST_2_4_N]], align 8
+; CHECK-NEXT:    ret void
+;
+  %pa = getelementptr [1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0
+  %pi8a = bitcast %struct.A* %pa to i8*
+
+  %pi8ap0 = getelementptr i8, i8* %pi8a, i32 0
+
+  ; Fold memchr((char*)a + 0, '\0', N) to N ? a : null.
+  %pst_0_0_n = getelementptr i8*, i8** %pchr, i32 0
+  %chr_0_0_n = call i8* @memchr(i8* %pi8ap0, i32 0, i64 %N)
+  store i8* %chr_0_0_n, i8** %pst_0_0_n
+
+  ; Fold memchr((char*)a + 0, '\01', N) to N < 2 ? null : a.
+  %pst_0_1_n = getelementptr i8*, i8** %pchr, i32 1
+  %chr_0_1_n = call i8* @memchr(i8* %pi8ap0, i32 1, i64 %N)
+  store i8* %chr_0_1_n, i8** %pst_0_1_n
+
+  ; Fold memchr((char*)a + 0, '\04', N) to null.
+  %pst_0_4_n = getelementptr i8*, i8** %pchr, i32 2
+  %chr_0_4_n = call i8* @memchr(i8* %pi8ap0, i32 4, i64 %N)
+  store i8* %chr_0_4_n, i8** %pst_0_4_n
+
+
+  %pi8ap1 = getelementptr i8, i8* %pi8a, i32 1
+
+  ; Fold memchr((char*)a + 1, '\0', N) to null.
+  %pst_1_0_n = getelementptr i8*, i8** %pchr, i32 3
+  %chr_1_0_n = call i8* @memchr(i8* %pi8ap1, i32 0, i64 %N)
+  store i8* %chr_1_0_n, i8** %pst_1_0_n
+
+  ; Fold memchr((char*)a + 1, '\01', N) N ? (char*)a + 1 : null.
+  %pst_1_1_n = getelementptr i8*, i8** %pchr, i32 4
+  %chr_1_1_n = call i8* @memchr(i8* %pi8ap1, i32 1, i64 %N)
+  store i8* %chr_1_1_n, i8** %pst_1_1_n
+
+  ; Fold memchr((char*)a + 1, '\02', N) to N < 2 ? null : (char*)a + 4.
+  %pst_1_2_n = getelementptr i8*, i8** %pchr, i32 5
+  %chr_1_2_n = call i8* @memchr(i8* %pi8ap1, i32 2, i64 %N)
+  store i8* %chr_1_2_n, i8** %pst_1_2_n
+
+  ; Fold memchr((char*)a + 1, '\03', N) to N < 6 ? null : (char*)a + 6.
+  %pst_1_3_n = getelementptr i8*, i8** %pchr, i32 6
+  %chr_1_3_n = call i8* @memchr(i8* %pi8ap1, i32 3, i64 %N)
+  store i8* %chr_1_3_n, i8** %pst_1_3_n
+
+  ; Fold memchr((char*)a + 1, '\04', N) to null.
+  %pst_1_4_n = getelementptr i8*, i8** %pchr, i32 7
+  %chr_1_4_n = call i8* @memchr(i8* %pi8ap1, i32 4, i64 %N)
+  store i8* %chr_1_4_n, i8** %pst_1_4_n
+
+
+  %pi8ap2 = getelementptr i8, i8* %pi8a, i32 2
+
+  ; Fold memchr((char*)a + 2, '\0', N) to null.
+  %pst_2_0_n = getelementptr i8*, i8** %pchr, i32 8
+  %chr_2_0_n = call i8* @memchr(i8* %pi8ap2, i32 0, i64 %N)
+  store i8* %chr_2_0_n, i8** %pst_2_0_n
+
+  ; Fold memchr((char*)a + 2, '\01', N) N ? (char*)a + 2 : null.
+  %pst_2_1_n = getelementptr i8*, i8** %pchr, i32 9
+  %chr_2_1_n = call i8* @memchr(i8* %pi8ap2, i32 1, i64 %N)
+  store i8* %chr_2_1_n, i8** %pst_2_1_n
+
+  ; Fold memchr((char*)a + 2, '\02', N) to N < 3 ? null : (char*)a + 2.
+  %pst_2_2_n = getelementptr i8*, i8** %pchr, i32 10
+  %chr_2_2_n = call i8* @memchr(i8* %pi8ap2, i32 2, i64 %N)
+  store i8* %chr_2_2_n, i8** %pst_2_2_n
+
+  ; Fold memchr((char*)a + 2, '\03', N) to N < 5 ? null : (char*)a + 4.
+  %pst_2_3_n = getelementptr i8*, i8** %pchr, i32 11
+  %chr_2_3_n = call i8* @memchr(i8* %pi8ap2, i32 3, i64 %N)
+  store i8* %chr_2_3_n, i8** %pst_2_3_n
+
+  ; Fold memchr((char*)a + 2, '\04', N) to null.
+  %pst_2_4_n = getelementptr i8*, i8** %pchr, i32 12
+  %chr_2_4_n = call i8* @memchr(i8* %pi8ap2, i32 4, i64 %N)
+  store i8* %chr_2_4_n, i8** %pst_2_4_n
+
+  ret void
+}
+
+
+; Verify that calls with out of bounds offsets are not folded.
+
+define void @call_memchr_A_pIb_xs_cst(i8** %pchr) {
+; CHECK-LABEL: @call_memchr_A_pIb_xs_cst(
+; CHECK-NEXT:    [[CHR_1_0_0_2:%.*]] = call i8* @memchr(i8* noundef nonnull dereferenceable(1) bitcast (%struct.A* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 1, i64 0) to i8*), i32 0, i64 2)
+; CHECK-NEXT:    store i8* [[CHR_1_0_0_2]], i8** [[PCHR:%.*]], align 8
+; CHECK-NEXT:    [[PST_1_0_1_2:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 1
+; CHECK-NEXT:    [[CHR_1_0_1_2:%.*]] = call i8* @memchr(i8* noundef nonnull dereferenceable(1) bitcast (%struct.A* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 1, i64 0) to i8*), i32 0, i64 2)
+; CHECK-NEXT:    store i8* [[CHR_1_0_1_2]], i8** [[PST_1_0_1_2]], align 8
+; CHECK-NEXT:    [[PST_0_0_8_2:%.*]] = getelementptr i8*, i8** [[PCHR]], i64 2
+; CHECK-NEXT:    [[CHR_0_0_8_2:%.*]] = call i8* @memchr(i8* noundef nonnull dereferenceable(1) bitcast (i16* getelementptr inbounds ([1 x %struct.A], [1 x %struct.A]* @a, i64 1, i64 0, i32 0, i64 0) to i8*), i32 0, i64 2)
+; CHECK-NEXT:    store i8* [[CHR_0_0_8_2]], i8** [[PST_0_0_8_2]], align 8
+; CHECK-NEXT:    ret void
+;
+; Verify that the call isn't folded when the first GEP index is excessive.
+  %pa1 = getelementptr [1 x %struct.A], [1 x %struct.A]* @a, i64 1, i64 0
+  %pi8a1 = bitcast %struct.A* %pa1 to i8*
+
+  %pi8a1p0 = getelementptr i8, i8* %pi8a1, i32 0
+
+  ; Don't fold memchr((char*)&a[1] + 0, '\0', 2).
+  %pst_1_0_0_2 = getelementptr i8*, i8** %pchr, i32 0
+  %chr_1_0_0_2 = call i8* @memchr(i8* %pi8a1p0, i32 0, i64 2)
+  store i8* %chr_1_0_0_2, i8** %pst_1_0_0_2
+
+  %pi8a1p1 = getelementptr i8, i8* %pi8a1, i32 1
+
+  ; Likewise, don't fold memchr((char*)&a[1] + 1, '\0', 2).
+  %pst_1_0_1_2 = getelementptr i8*, i8** %pchr, i32 1
+  %chr_1_0_1_2 = call i8* @memchr(i8* %pi8a1p0, i32 0, i64 2)
+  store i8* %chr_1_0_1_2, i8** %pst_1_0_1_2
+
+; Verify that the call isn't folded when the first GEP index is in bounds
+; but the byte offset is excessive.
+  %pa0 = getelementptr [1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0
+  %pi8a0 = bitcast %struct.A* %pa0 to i8*
+
+  %pi8a0p8 = getelementptr i8, i8* %pi8a0, i32 8
+
+  ; Don't fold memchr((char*)&a[0] + 8, '\0', 2).
+  %pst_0_0_8_2 = getelementptr i8*, i8** %pchr, i32 2
+  %chr_0_0_8_2 = call i8* @memchr(i8* %pi8a0p8, i32 0, i64 2)
+  store i8* %chr_0_0_8_2, i8** %pst_0_0_8_2
+
+  ret void
+}
+
+
+ at ai64 = constant [2 x i64] [i64 0, i64 -1]
+
+; Verify that a memchr call with an argument consisting of three GEPs
+; is folded.
+
+define i8* @fold_memchr_gep_gep_gep() {
+; CHECK-LABEL: @fold_memchr_gep_gep_gep(
+; CHECK-NEXT:    ret i8* bitcast (i16* getelementptr (i16, i16* bitcast (i32* getelementptr (i32, i32* bitcast (i64* getelementptr inbounds ([2 x i64], [2 x i64]* @ai64, i64 0, i64 1) to i32*), i64 1) to i16*), i64 1) to i8*)
+;
+
+  %p8_1 = getelementptr [2 x i64], [2 x i64]* @ai64, i64 0, i64 1
+  %p4_0 = bitcast i64* %p8_1 to i32*
+  %p4_1 = getelementptr i32, i32* %p4_0, i64 1
+
+  %p2_0 = bitcast i32* %p4_1 to i16*
+  %p2_1 = getelementptr i16, i16* %p2_0, i64 1
+  %q2_1 = bitcast i16* %p2_1 to i8*
+
+  %pc = call i8* @memchr(i8* %q2_1, i32 -1, i64 2)
+  ret i8* %pc
+}
+
+
+%union.U = type { [2 x i32] }
+
+ at u = constant %union.U { [2 x i32] [i32 286331153, i32 35791394] }
+
+; Verify memchr folding of a union member.
+
+define i8* @fold_memchr_union_member() {
+; CHECK-LABEL: @fold_memchr_union_member(
+; BE-CHECK-NEXT:    ret i8* getelementptr (i8, i8* bitcast (%union.U* @u to i8*), i64 5)
+; LE-CHECK-NEXT:    ret i8* bitcast (i32* getelementptr inbounds (%union.U, %union.U* @u, i64 0, i32 0, i64 1) to i8*)
+;
+  %pu = getelementptr %union.U, %union.U* @u, i64 0
+  %pi8u = bitcast %union.U* %pu to i8*
+  %pi8u_p1 = getelementptr i8, i8* %pi8u, i64 1
+  %pc = call i8* @memchr(i8* %pi8u_p1, i32 34, i64 8)
+  ret i8* %pc
+}

diff  --git a/llvm/test/Transforms/InstCombine/memcmp-7.ll b/llvm/test/Transforms/InstCombine/memcmp-7.ll
new file mode 100644
index 0000000000000..fa1c52fd42820
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcmp-7.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Exercise folding of memcmp calls with addition expressions involving
+; pointers into constant arrays of types larger than char and fractional
+; offsets.
+
+declare i32 @memcmp(i8*, i8*, i64)
+
+ at i32a = constant [2 x i16] [i16 4386, i16 13124]
+ at i32b = constant [2 x i16] [i16 4386, i16 13124]
+
+
+define void @fold_memcmp_i32a_i32b_pIb(i32 %I, i32* %pcmp)
+; CHECK-LABEL: @fold_memcmp_i32a_i32b_pIb(
+; CHECK-NEXT:    store i32 0, i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    [[PST_1_1_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 0, i32* [[PST_1_1_2]], align 4
+; CHECK-NEXT:    [[PST_1_1_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
+; CHECK-NEXT:    store i32 0, i32* [[PST_1_1_3]], align 4
+; CHECK-NEXT:    ret void
+;
+{
+  %pi32a = getelementptr [2 x i16], [2 x i16]* @i32a, i32 0, i32 0
+  %pi32b = getelementptr [2 x i16], [2 x i16]* @i32b, i32 0, i32 0
+
+  %pi8a = bitcast i16* %pi32a to i8*
+  %pi8b = bitcast i16* %pi32b to i8*
+
+  %pi8ap1 = getelementptr i8, i8* %pi8a, i32 1
+  %pi8bp1 = getelementptr i8, i8* %pi8b, i32 1
+
+  %pst_1_1_1 = getelementptr i32, i32* %pcmp, i32 0
+  %cmp_1_1_1 = call i32 @memcmp(i8* %pi8ap1, i8* %pi8ap1, i64 1)
+  store i32 %cmp_1_1_1, i32* %pst_1_1_1
+
+  %pst_1_1_2 = getelementptr i32, i32* %pcmp, i32 1
+  %cmp_1_1_2 = call i32 @memcmp(i8* %pi8ap1, i8* %pi8ap1, i64 2)
+  store i32 %cmp_1_1_2, i32* %pst_1_1_2
+
+  %pst_1_1_3 = getelementptr i32, i32* %pcmp, i32 2
+  %cmp_1_1_3 = call i32 @memcmp(i8* %pi8ap1, i8* %pi8ap1, i64 3)
+  store i32 %cmp_1_1_3, i32* %pst_1_1_3
+
+  ret void
+}
+
+
+%struct.A = type { [4 x i8] }
+%struct.B = type { [2 x i8], [2 x i8] }
+
+ at a = constant [1 x %struct.A] [%struct.A { [4 x i8] [i8 1, i8 2, i8 3, i8 4] }]
+ at b = constant [1 x %struct.B] [%struct.B { [2 x i8] [i8 1, i8 2], [2 x i8] [i8 3, i8 4]}]
+
+define void @fold_memcmp_A_B_pIb(i32 %I, i32* %pcmp) {
+; CHECK-LABEL: @fold_memcmp_A_B_pIb(
+; CHECK-NEXT:    store i32 0, i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    [[PST_0_0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 0, i32* [[PST_0_0_2]], align 4
+; CHECK-NEXT:    [[PST_0_0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
+; CHECK-NEXT:    store i32 0, i32* [[PST_0_0_3]], align 4
+; CHECK-NEXT:    [[PST_0_0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
+; CHECK-NEXT:    store i32 0, i32* [[PST_0_0_4]], align 4
+; CHECK-NEXT:    [[PST_0_1_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4
+; CHECK-NEXT:    store i32 -1, i32* [[PST_0_1_1]], align 4
+; CHECK-NEXT:    [[PST_0_1_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5
+; CHECK-NEXT:    store i32 -1, i32* [[PST_0_1_2]], align 4
+; CHECK-NEXT:    [[PST_0_1_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 6
+; CHECK-NEXT:    store i32 -1, i32* [[PST_0_1_3]], align 4
+; CHECK-NEXT:    [[PST_1_0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4
+; CHECK-NEXT:    store i32 1, i32* [[PST_1_0_1]], align 4
+; CHECK-NEXT:    [[PST_1_0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5
+; CHECK-NEXT:    store i32 1, i32* [[PST_1_0_2]], align 4
+; CHECK-NEXT:    [[PST_1_0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 6
+; CHECK-NEXT:    store i32 1, i32* [[PST_1_0_3]], align 4
+; CHECK-NEXT:    ret void
+;
+  %pa = getelementptr [1 x %struct.A], [1 x %struct.A]* @a, i64 0, i64 0
+  %pb = getelementptr [1 x %struct.B], [1 x %struct.B]* @b, i64 0, i64 0
+
+  %pi8a = bitcast %struct.A* %pa to i8*
+  %pi8b = bitcast %struct.B* %pb to i8*
+
+  %pi8ap0 = getelementptr i8, i8* %pi8a, i32 0
+  %pi8bp0 = getelementptr i8, i8* %pi8b, i32 0
+
+  ; Fold memcmp(&a, &b, 1) to 0;
+  %pst_0_0_1 = getelementptr i32, i32* %pcmp, i32 0
+  %cmp_0_0_1 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp0, i64 1)
+  store i32 %cmp_0_0_1, i32* %pst_0_0_1
+
+  ; Fold memcmp(&a, &b, 2) to 0;
+  %pst_0_0_2 = getelementptr i32, i32* %pcmp, i32 1
+  %cmp_0_0_2 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp0, i64 2)
+  store i32 %cmp_0_0_2, i32* %pst_0_0_2
+
+  ; Fold memcmp(&a, &b, 3) to 0;
+  %pst_0_0_3 = getelementptr i32, i32* %pcmp, i32 2
+  %cmp_0_0_3 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp0, i64 3)
+  store i32 %cmp_0_0_3, i32* %pst_0_0_3
+
+  ; Fold memcmp(&a, &b, 4) to 0;
+  %pst_0_0_4 = getelementptr i32, i32* %pcmp, i32 3
+  %cmp_0_0_4 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp0, i64 4)
+  store i32 %cmp_0_0_4, i32* %pst_0_0_4
+
+
+  %pi8bp1 = getelementptr i8, i8* %pi8b, i32 1
+
+  ; Fold memcmp(&a, (char*)&b + 1, 1) to -1;
+  %pst_0_1_1 = getelementptr i32, i32* %pcmp, i32 4
+  %cmp_0_1_1 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp1, i64 1)
+  store i32 %cmp_0_1_1, i32* %pst_0_1_1
+
+  ; Fold memcmp(&a, (char*)&b + 1, 2) to -1;
+  %pst_0_1_2 = getelementptr i32, i32* %pcmp, i32 5
+  %cmp_0_1_2 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp1, i64 2)
+  store i32 %cmp_0_1_2, i32* %pst_0_1_2
+
+  ; Fold memcmp(&a, (char*)&b + 1, 3) to -1;
+  %pst_0_1_3 = getelementptr i32, i32* %pcmp, i32 6
+  %cmp_0_1_3 = call i32 @memcmp(i8* %pi8ap0, i8* %pi8bp1, i64 3)
+  store i32 %cmp_0_1_3, i32* %pst_0_1_3
+
+
+  %pi8ap1 = getelementptr i8, i8* %pi8a, i32 1
+
+  ; Fold memcmp((char*)&a + 1, &b, 1) to +1;
+  %pst_1_0_1 = getelementptr i32, i32* %pcmp, i32 4
+  %cmp_1_0_1 = call i32 @memcmp(i8* %pi8ap1, i8* %pi8bp0, i64 1)
+  store i32 %cmp_1_0_1, i32* %pst_1_0_1
+
+  ; Fold memcmp((char*)&a + 1, &b, 2) to +1;
+  %pst_1_0_2 = getelementptr i32, i32* %pcmp, i32 5
+  %cmp_1_0_2 = call i32 @memcmp(i8* %pi8ap1, i8* %pi8bp0, i64 2)
+  store i32 %cmp_1_0_2, i32* %pst_1_0_2
+
+  ; Fold memcmp((char*)&a + 1, &b, 3) to +1;
+  %pst_1_0_3 = getelementptr i32, i32* %pcmp, i32 6
+  %cmp_1_0_3 = call i32 @memcmp(i8* %pi8ap1, i8* %pi8bp0, i64 3)
+  store i32 %cmp_1_0_3, i32* %pst_1_0_3
+
+  ret void
+}

diff  --git a/llvm/test/Transforms/InstCombine/memcmp-8.ll b/llvm/test/Transforms/InstCombine/memcmp-8.ll
new file mode 100644
index 0000000000000..7b7a5363e40ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memcmp-8.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that the result of memrchr calls with past-the-end pointers used
+; don't cause trouble and are optimally folded.
+
+declare i32 @memcmp(i8*, i8*, i64)
+
+
+ at a5 = constant [5 x i8] c"12345";
+
+
+; Fold memcmp(a5, a5 + 5, n) to 0 on the assumption that n is 0 otherwise
+; the call would be undefined.
+
+define i32 @fold_memcmp_a5_a5p5_n(i64 %n) {
+; CHECK-LABEL: @fold_memcmp_a5_a5p5_n(
+; CHECK-NEXT:    ret i32 0
+;
+  %pa5_p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %pa5_p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %cmp = call i32 @memcmp(i8* %pa5_p0, i8* %pa5_p5, i64 %n)
+  ret i32 %cmp
+}
+
+
+; Same as above but for memcmp(a5 + 5, a5 + 5, n).
+
+define i32 @fold_memcmp_a5p5_a5p5_n(i64 %n) {
+; CHECK-LABEL: @fold_memcmp_a5p5_a5p5_n(
+; CHECK-NEXT:    ret i32 0
+;
+  %pa5_p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %qa5_p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %cmp = call i32 @memcmp(i8* %pa5_p5, i8* %qa5_p5, i64 %n)
+  ret i32 %cmp
+}
+
+
+; TODO: Likewise, fold memcmp(a5 + i, a5 + 5, n) to 0 on same basis.
+
+define i32 @fold_memcmp_a5pi_a5p5_n(i32 %i, i64 %n) {
+; CHECK-LABEL: @fold_memcmp_a5pi_a5p5_n(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[PA5_PI:%.*]] = getelementptr [5 x i8], [5 x i8]* @a5, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[CMP:%.*]] = call i32 @memcmp(i8* [[PA5_PI]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 1, i64 0), i64 [[N:%.*]])
+; CHECK-NEXT:    ret i32 [[CMP]]
+;
+  %pa5_pi = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 %i
+  %pa5_p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %cmp = call i32 @memcmp(i8* %pa5_pi, i8* %pa5_p5, i64 %n)
+  ret i32 %cmp
+}

diff  --git a/llvm/test/Transforms/InstCombine/memrchr-7.ll b/llvm/test/Transforms/InstCombine/memrchr-7.ll
new file mode 100644
index 0000000000000..e9847eed2a17d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/memrchr-7.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that the result of memrchr calls with past-the-end pointers used
+; in equality expressions don't cause trouble and either are folded when
+; they might be valid or not when they're provably undefined.
+
+declare i8* @memrchr(i8*, i32, i64)
+
+
+ at a5 = constant [5 x i8] c"12345"
+
+
+; Fold memrchr(a5 + 5, c, 1) == a5 + 5 to an arbitrary constant.
+; The call is transformed to a5[5] == c by the memrchr simplifier, with
+; a5[5] being indeterminate.  The equality then is the folded with
+; an undefined/arbitrary result.
+
+define i1 @call_memrchr_ap5_c_1_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memrchr_ap5_c_1_eq_a(
+; CHECK-NEXT:    ret i1
+;
+  %pap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %qap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 1, i32 0
+  %q = call i8* @memrchr(i8* %pap5, i32 %c, i64 1)
+  %cmp = icmp eq i8* %q, %qap5
+  ret i1 %cmp
+}
+
+
+; Fold memrchr(a5 + 5, c, 5) == a5 + 5 to an arbitrary constant.
+
+define i1 @call_memrchr_ap5_c_5_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @call_memrchr_ap5_c_5_eq_a(
+; CHECK-NEXT:    ret i1
+;
+  %pap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %qap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 1, i32 0
+  %q = call i8* @memrchr(i8* %pap5, i32 %c, i64 5)
+  %cmp = icmp eq i8* %q, %qap5
+  ret i1 %cmp
+}
+
+
+; Fold memrchr(a5 + 5, c, n) == a5 to false.
+
+define i1 @fold_memrchr_ap5_c_n_eq_a(i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memrchr_ap5_c_n_eq_a(
+; CHECK-NEXT:    ret i1 false
+;
+  %pa = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %pap5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @memrchr(i8* %pap5, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, %pa
+  ret i1 %cmp
+}
+
+
+; Fold memrchr(a5 + 5, c, n) == null to true on the basis that n must
+; be zero in order for the call to be valid.
+
+define i1 @fold_memrchr_ap5_c_n_eqz(i32 %c, i64 %n) {
+; CHECK-LABEL: @fold_memrchr_ap5_c_n_eqz(
+; CHECK-NEXT:    ret i1 true
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @memrchr(i8* %p, i32 %c, i64 %n)
+  %cmp = icmp eq i8* %q, null
+  ret i1 %cmp
+}
+
+
+; Fold memrchr(a5 + 5, '\0', n) == null to true again on the basis that
+; n must be zero in order for the call to be valid.
+
+define i1 @fold_memrchr_a_nul_n_eqz(i64 %n) {
+; CHECK-LABEL: @fold_memrchr_a_nul_n_eqz(
+; CHECK-NEXT:    ret i1 true
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @memrchr(i8* %p, i32 0, i64 %n)
+  %cmp = icmp eq i8* %q, null
+  ret i1 %cmp
+}

diff  --git a/llvm/test/Transforms/InstCombine/str-int-3.ll b/llvm/test/Transforms/InstCombine/str-int-3.ll
index 7bce9b18c71be..916d3fed5b45a 100644
--- a/llvm/test/Transforms/InstCombine/str-int-3.ll
+++ b/llvm/test/Transforms/InstCombine/str-int-3.ll
@@ -57,25 +57,24 @@ define void @fold_atoi_member(i32* %pi) {
 }
 
 
-; Do not fold atoi with an excessive offset.  It's undefined so folding
-; it (e.g., to zero) would be valid and might prevent crashes or returning
-; a bogus value but could also prevent detecting the bug by sanitizers.
+; Fold atoi with an excessive offset.  It's undefined so folding it to zero
+; is valid and might prevent crashes or returning a bogus value, even though
+; it prevents detecting the bug by sanitizers.
 
 define void @call_atoi_offset_out_of_bounds(i32* %pi) {
 ; CHECK-LABEL: @call_atoi_offset_out_of_bounds(
-; CHECK-NEXT:    [[IA_0_0_32:%.*]] = call i32 @atoi(i8* getelementptr inbounds ([2 x %struct.A], [2 x %struct.A]* @a, i64 1, i64 0, i32 0, i64 0))
-; CHECK-NEXT:    store i32 [[IA_0_0_32]], i32* [[PI:%.*]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[PI:%.*]], align 4
 ; CHECK-NEXT:    [[IA_0_0_33:%.*]] = call i32 @atoi(i8* getelementptr ([2 x %struct.A], [2 x %struct.A]* @a, i64 1, i64 0, i32 0, i64 1))
 ; CHECK-NEXT:    store i32 [[IA_0_0_33]], i32* [[PI]], align 4
 ; CHECK-NEXT:    ret void
 ;
-; Do not fold atoi((const char*)a + sizeof a).
+; Fold atoi((const char*)a + sizeof a) to zero.
   %pa_0_0_32 = getelementptr [2 x %struct.A], [2 x %struct.A]* @a, i64 0, i64 0, i32 0, i64 32
   %ia_0_0_32 = call i32 @atoi(i8* %pa_0_0_32)
   %pia_0_0_32 = getelementptr i32, i32* %pi, i32 0
   store i32 %ia_0_0_32, i32* %pia_0_0_32
 
-; Likewise, do not fold atoi((const char*)a + sizeof a + 1).
+; Likewise, fold atoi((const char*)a + sizeof a + 1) to zero.
   %pa_0_0_33 = getelementptr [2 x %struct.A], [2 x %struct.A]* @a, i64 0, i64 0, i32 0, i64 33
   %ia_0_0_33 = call i32 @atoi(i8* %pa_0_0_33)
   %pia_0_0_33 = getelementptr i32, i32* %pi, i32 0

diff  --git a/llvm/test/Transforms/InstCombine/strcall-no-nul.ll b/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
new file mode 100644
index 0000000000000..9f895437f94cc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strcall-no-nul.ll
@@ -0,0 +1,319 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+;
+; Verify that calls with arguments with pointers just past the end of
+; a string to [a subset of] library functions that expect nul-terminated
+; strings as arguments are folded to safe values.  The rationale is that
+; since they are undefined and even though folding them isn't important
+; for efficiency and prevents sanitizers from detecting and reporting
+; them, sanitizers usually don't run, and transforming such invalid
+; calls to something valid is safer than letting the program run off
+; the rails.  See the Safe Optimizations for Sanitizers RFC for
+; an in-depth discussion of the trade-offs:
+; https://discourse.llvm.org/t/rfc-safe-optimizations-for-sanitizers
+
+declare i8* @strchr(i8*, i32)
+declare i8* @strrchr(i8*, i32)
+declare i32 @strcmp(i8*, i8*)
+declare i32 @strncmp(i8*, i8*, i64)
+declare i8* @strstr(i8*, i8*)
+
+declare i8* @stpcpy(i8*, i8*)
+declare i8* @strcpy(i8*, i8*)
+declare i8* @stpncpy(i8*, i8*, i64)
+declare i8* @strncpy(i8*, i8*, i64)
+
+declare i64 @strlen(i8*)
+declare i64 @strnlen(i8*, i64)
+
+declare i8* @strpbrk(i8*, i8*)
+
+declare i64 @strspn(i8*, i8*)
+declare i64 @strcspn(i8*, i8*)
+
+declare i32 @sprintf(i8*, i8*, ...)
+declare i32 @snprintf(i8*, i64, i8*, ...)
+
+
+ at a5 = constant [5 x i8] c"%s\0045";
+
+
+; Fold strchr(a5 + 5, '\0') to null.
+
+define i8* @fold_strchr_past_end() {
+; CHECK-LABEL: @fold_strchr_past_end(
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 1, i64 0)
+;
+  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %q = call i8* @strchr(i8* %p, i32 0)
+  ret i8* %q
+}
+
+; Fold strcmp(a5, a5 + 5) (and vice versa) to null.
+
+define void @fold_strcmp_past_end(i32* %pcmp) {
+; CHECK-LABEL: @fold_strcmp_past_end(
+; CHECK-NEXT:    store i32 1, i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    [[PC50:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 -1, i32* [[PC50]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %c05 = call i32 @strcmp(i8* %p0, i8* %p5)
+  %pc05 = getelementptr i32, i32* %pcmp, i32 0
+  store i32 %c05, i32* %pc05
+
+  %c50 = call i32 @strcmp(i8* %p5, i8* %p0)
+  %pc50 = getelementptr i32, i32* %pcmp, i32 1
+  store i32 %c50, i32* %pc50
+
+  ret void
+}
+
+
+; Likewise, fold strncmp(a5, a5 + 5, 5) (and vice versa) to null.
+
+define void @fold_strncmp_past_end(i32* %pcmp) {
+; CHECK-LABEL: @fold_strncmp_past_end(
+; CHECK-NEXT:    store i32 1, i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    [[PC50:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 -1, i32* [[PC50]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %c05 = call i32 @strncmp(i8* %p0, i8* %p5, i64 5)
+  %pc05 = getelementptr i32, i32* %pcmp, i32 0
+  store i32 %c05, i32* %pc05
+
+  %c50 = call i32 @strncmp(i8* %p5, i8* %p0, i64 5)
+  %pc50 = getelementptr i32, i32* %pcmp, i32 1
+  store i32 %c50, i32* %pc50
+
+  ret void
+}
+
+
+; Fold strrchr(a5 + 5, '\0') to null.
+
+define i8* @fold_strrchr_past_end(i32 %c) {
+; CHECK-LABEL: @fold_strrchr_past_end(
+; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 1, i64 0)
+;
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %r = call i8* @strrchr(i8* %p5, i32 0)
+  ret i8* %r
+}
+
+
+; Fold strstr(a5 + 5, a5) (and vice versa) to null.
+
+define void @fold_strstr_past_end(i8** %psub) {
+; CHECK-LABEL: @fold_strstr_past_end(
+; CHECK-NEXT:    store i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i8** [[PSUB:%.*]], align 8
+; CHECK-NEXT:    [[PS50:%.*]] = getelementptr i8*, i8** [[PSUB]], i64 1
+; CHECK-NEXT:    store i8* null, i8** [[PS50]], align 8
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %s05 = call i8* @strstr(i8* %p0, i8* %p5)
+  %ps05 = getelementptr i8*, i8** %psub, i32 0
+  store i8* %s05, i8** %ps05
+
+  %s50 = call i8* @strstr(i8* %p5, i8* %p0)
+  %ps50 = getelementptr i8*, i8** %psub, i32 1
+  store i8* %s50, i8** %ps50
+
+  ret void
+}
+
+
+; Fold strlen(a5 + 5) to 0.
+
+define i64 @fold_strlen_past_end() {
+; CHECK-LABEL: @fold_strlen_past_end(
+; CHECK-NEXT:    ret i64 0
+;
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %r = call i64 @strlen(i8* %p5)
+  ret i64 %r
+}
+
+
+; TODO: Fold stpcpy(dst, a5 + 5) to (*dst = '\0', dst).
+
+define i8* @fold_stpcpy_past_end(i8* %dst) {
+; CHECK-LABEL: @fold_stpcpy_past_end(
+; CHECK-NEXT:    ret i8* [[DST:%.*]]
+;
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %r = call i8* @strcpy(i8* %dst, i8* %p5)
+  ret i8* %r
+}
+
+
+; TODO: Fold strcpy(dst, a5 + 5) to (*dst = '\0', dst).
+
+define i8* @fold_strcpy_past_end(i8* %dst) {
+; CHECK-LABEL: @fold_strcpy_past_end(
+; CHECK-NEXT:    ret i8* [[DST:%.*]]
+;
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %r = call i8* @strcpy(i8* %dst, i8* %p5)
+  ret i8* %r
+}
+
+
+; TODO: Fold stpncpy(dst, a5 + 5, 5) to (memset(dst, 0, 5), dst + 5).
+
+define i8* @fold_stpncpy_past_end(i8* %dst) {
+; CHECK-LABEL: @fold_stpncpy_past_end(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8 0, i64 5, i1 false)
+; CHECK-NEXT:    ret i8* [[DST]]
+;
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %r = call i8* @strncpy(i8* %dst, i8* %p5, i64 5)
+  ret i8* %r
+}
+
+
+; TODO: Fold strncpy(dst, a5 + 5, 5) to memset(dst, 0, 5).
+
+define i8* @fold_strncpy_past_end(i8* %dst) {
+; CHECK-LABEL: @fold_strncpy_past_end(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8 0, i64 5, i1 false)
+; CHECK-NEXT:    ret i8* [[DST]]
+;
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %r = call i8* @strncpy(i8* %dst, i8* %p5, i64 5)
+  ret i8* %r
+}
+
+
+; Fold strpbrk(a5, a5 + 5) (and vice versa) to null.
+
+define void @fold_strpbrk_past_end(i8** %psub) {
+; CHECK-LABEL: @fold_strpbrk_past_end(
+; CHECK-NEXT:    store i8* null, i8** [[PSUB:%.*]], align 8
+; CHECK-NEXT:    [[PS50:%.*]] = getelementptr i8*, i8** [[PSUB]], i64 1
+; CHECK-NEXT:    store i8* null, i8** [[PS50]], align 8
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %s05 = call i8* @strpbrk(i8* %p0, i8* %p5)
+  %ps05 = getelementptr i8*, i8** %psub, i32 0
+  store i8* %s05, i8** %ps05
+
+  %s50 = call i8* @strpbrk(i8* %p5, i8* %p0)
+  %ps50 = getelementptr i8*, i8** %psub, i32 1
+  store i8* %s50, i8** %ps50
+
+  ret void
+}
+
+
+; Fold strspn(a5, a5 + 5) (and vice versa) to null.
+
+define void @fold_strspn_past_end(i64* %poff) {
+; CHECK-LABEL: @fold_strspn_past_end(
+; CHECK-NEXT:    store i64 0, i64* [[POFF:%.*]], align 4
+; CHECK-NEXT:    [[PO50:%.*]] = getelementptr i64, i64* [[POFF]], i64 1
+; CHECK-NEXT:    store i64 0, i64* [[PO50]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %o05 = call i64 @strspn(i8* %p0, i8* %p5)
+  %po05 = getelementptr i64, i64* %poff, i32 0
+  store i64 %o05, i64* %po05
+
+  %o50 = call i64 @strspn(i8* %p5, i8* %p0)
+  %po50 = getelementptr i64, i64* %poff, i32 1
+  store i64 %o50, i64* %po50
+
+  ret void
+}
+
+
+; Fold strcspn(a5, a5 + 5) (and vice versa) to null.
+
+define void @fold_strcspn_past_end(i64* %poff) {
+; CHECK-LABEL: @fold_strcspn_past_end(
+; CHECK-NEXT:    store i64 2, i64* [[POFF:%.*]], align 4
+; CHECK-NEXT:    [[PO50:%.*]] = getelementptr i64, i64* [[POFF]], i64 1
+; CHECK-NEXT:    store i64 0, i64* [[PO50]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %o05 = call i64 @strcspn(i8* %p0, i8* %p5)
+  %po05 = getelementptr i64, i64* %poff, i32 0
+  store i64 %o05, i64* %po05
+
+  %o50 = call i64 @strcspn(i8* %p5, i8* %p0)
+  %po50 = getelementptr i64, i64* %poff, i32 1
+  store i64 %o50, i64* %po50
+
+  ret void
+}
+
+
+; Fold sprintf(dst, a5 + 5) to zero, and also
+; TODO: fold sprintf(dst, "%s", a5 + 5) to zero.
+
+define void @fold_sprintf_past_end(i32* %pcnt, i8* %dst) {
+; CHECK-LABEL: @fold_sprintf_past_end(
+; CHECK-NEXT:    store i32 0, i32* [[PCNT:%.*]], align 4
+; CHECK-NEXT:    [[PN05:%.*]] = getelementptr i32, i32* [[PCNT]], i64 1
+; CHECK-NEXT:    store i32 0, i32* [[PN05]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %n5_ = call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %p5)
+  %pn5_ = getelementptr i32, i32* %pcnt, i32 0
+  store i32 %n5_, i32* %pn5_
+
+  %n05 = call i32 (i8*, i8*, ...) @sprintf(i8* %dst, i8* %p0, i8* %p5)
+  %pn05 = getelementptr i32, i32* %pcnt, i32 1
+  store i32 %n05, i32* %pn05
+
+  ret void
+}
+
+
+; Fold snprintf(dst, n, a5 + 5) to zero, and also
+; TODO: fold snprintf(dst, n, "%s", a5 + 5) to zero.
+
+define void @fold_snprintf_past_end(i32* %pcnt, i8* %dst, i64 %n) {
+; CHECK-LABEL: @fold_snprintf_past_end(
+; CHECK-NEXT:    [[N5_:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[DST:%.*]], i64 [[N:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 1, i64 0))
+; CHECK-NEXT:    store i32 [[N5_]], i32* [[PCNT:%.*]], align 4
+; CHECK-NEXT:    [[N05:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* [[DST]], i64 [[N]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 1, i64 0))
+; CHECK-NEXT:    [[PN05:%.*]] = getelementptr i32, i32* [[PCNT]], i64 1
+; CHECK-NEXT:    store i32 [[N05]], i32* [[PN05]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+
+  %n5_ = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %dst, i64 %n, i8* %p5)
+  %pn5_ = getelementptr i32, i32* %pcnt, i32 0
+  store i32 %n5_, i32* %pn5_
+
+  %n05 = call i32 (i8*, i64, i8*, ...) @snprintf(i8* %dst, i64 %n, i8* %p0, i8* %p5)
+  %pn05 = getelementptr i32, i32* %pcnt, i32 1
+  store i32 %n05, i32* %pn05
+
+  ret void
+}

diff  --git a/llvm/test/Transforms/InstCombine/strlen-9.ll b/llvm/test/Transforms/InstCombine/strlen-9.ll
new file mode 100644
index 0000000000000..bea9ec1de3826
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/strlen-9.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+;
+; Verify that strlen calls with unterminated constant arrays or with
+; just past-the-end pointers to strings are not folded.
+;
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+declare i64 @strlen(i8*)
+
+ at a5 = constant [5 x i8] c"12345"
+ at s5 = constant [6 x i8] c"12345\00"
+ at z0 = constant [0 x i8] zeroinitializer
+ at z5 = constant [5 x i8] zeroinitializer
+
+
+; Verify that all the invalid calls below are folded.  This is safer than
+; making the library calls even though it prevents sanitizers from reporting
+; the bugs.
+
+define void @fold_strlen_no_nul(i64* %plen, i32 %i) {
+; CHECK-LABEL: @fold_strlen_no_nul(
+; CHECK-NEXT:    store i64 5, i64* [[PLEN:%.*]], align 4
+; CHECK-NEXT:    [[PNA5_P5:%.*]] = getelementptr i64, i64* [[PLEN]], i64 1
+; CHECK-NEXT:    store i64 0, i64* [[PNA5_P5]], align 4
+; CHECK-NEXT:    [[PNS5_P6:%.*]] = getelementptr i64, i64* [[PLEN]], i64 2
+; CHECK-NEXT:    store i64 0, i64* [[PNS5_P6]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[PA5_PI:%.*]] = getelementptr [5 x i8], [5 x i8]* @a5, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[NA5_PI:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[PA5_PI]])
+; CHECK-NEXT:    [[PNA5_PI:%.*]] = getelementptr i64, i64* [[PLEN]], i64 3
+; CHECK-NEXT:    store i64 [[NA5_PI]], i64* [[PNA5_PI]], align 4
+; CHECK-NEXT:    [[PNZ0_P0:%.*]] = getelementptr i64, i64* [[PLEN]], i64 4
+; CHECK-NEXT:    store i64 0, i64* [[PNZ0_P0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[PZ0_PI:%.*]] = getelementptr [0 x i8], [0 x i8]* @z0, i64 0, i64 [[TMP2]]
+; CHECK-NEXT:    [[NZ0_PI:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[PZ0_PI]])
+; CHECK-NEXT:    [[PNZ0_PI:%.*]] = getelementptr i64, i64* [[PLEN]], i64 5
+; CHECK-NEXT:    store i64 [[NZ0_PI]], i64* [[PNZ0_PI]], align 4
+; CHECK-NEXT:    [[PNZ5_P5:%.*]] = getelementptr i64, i64* [[PLEN]], i64 6
+; CHECK-NEXT:    store i64 0, i64* [[PNZ5_P5]], align 4
+; CHECK-NEXT:    ret void
+;
+; Verify that strlen(a5) is folded to 5.
+  %pa0_p0 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
+  %na5_p0 = call i64 @strlen(i8* %pa0_p0)
+  %pna5_p0 = getelementptr i64, i64* %plen, i64 0
+  store i64 %na5_p0, i64* %pna5_p0
+
+; Verify that strlen(a5 + 5) is folded to 0.
+  %pa5_p5 = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 5
+  %na5_p5 = call i64 @strlen(i8* %pa5_p5)
+  %pna5_p5 = getelementptr i64, i64* %plen, i64 1
+  store i64 %na5_p5, i64* %pna5_p5
+
+; Verify that strlen(s5 + 6) is folded to 0.
+  %ps5_p6 = getelementptr [6 x i8], [6 x i8]* @s5, i32 0, i32 6
+  %ns5_p6 = call i64 @strlen(i8* %ps5_p6)
+  %pns5_p6 = getelementptr i64, i64* %plen, i64 2
+  store i64 %ns5_p6, i64* %pns5_p6
+
+; TODO: Verify that strlen(a5 + i) is folded to 5 - i?  It's currently
+; not folded because the variable offset makes getConstantDataArrayInfo
+; fail.
+  %pa5_pi = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 %i
+  %na5_pi = call i64 @strlen(i8* %pa5_pi)
+  %pna5_pi = getelementptr i64, i64* %plen, i64 3
+  store i64 %na5_pi, i64* %pna5_pi
+
+; Verify that strlen(z0) is folded to 0.
+  %pz0_p0 = getelementptr [0 x i8], [0 x i8]* @z0, i32 0, i32 0
+  %nz0_p0 = call i64 @strlen(i8* %pz0_p0)
+  %pnz0_p0 = getelementptr i64, i64* %plen, i64 4
+  store i64 %nz0_p0, i64* %pnz0_p0
+
+; TODO: Verify that strlen(z0 + i) is folded to 0.  As the case above,
+; this one is not folded either because the variable offset makes
+; getConstantDataArrayInfo fail.
+
+  %pz0_pi = getelementptr [0 x i8], [0 x i8]* @z0, i32 0, i32 %i
+  %nz0_pi = call i64 @strlen(i8* %pz0_pi)
+  %pnz0_pi = getelementptr i64, i64* %plen, i64 5
+  store i64 %nz0_pi, i64* %pnz0_pi
+
+; Verify that strlen(z5 + 5) is folded to 0.
+  %pz5_p5 = getelementptr [5 x i8], [5 x i8]* @z5, i32 0, i32 5
+  %nz5_p5 = call i64 @strlen(i8* %pz5_p5)
+  %pnz5_p5 = getelementptr i64, i64* %plen, i64 6
+  store i64 %nz5_p5, i64* %pnz5_p5
+
+  ret void
+}

diff  --git a/llvm/test/Transforms/InstCombine/strnlen-1.ll b/llvm/test/Transforms/InstCombine/strnlen-1.ll
index d17c11e21964c..318ffe06414d9 100644
--- a/llvm/test/Transforms/InstCombine/strnlen-1.ll
+++ b/llvm/test/Transforms/InstCombine/strnlen-1.ll
@@ -69,9 +69,9 @@ define i64 @fold_strnlen_ax_0() {
 define i64 @fold_strnlen_ax_1() {
 ; CHECK-LABEL: @fold_strnlen_ax_1(
 ; CHECK-NEXT:    [[STRNLEN_CHAR0:%.*]] = load i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @ax, i64 0, i64 0), align 1
-; CHECK-NEXT:    [[STRNLEN_CHAR0CMP_NOT:%.*]] = icmp ne i8 [[STRNLEN_CHAR0]], 0
-; CHECK-NEXT:    [[STRNLEN_SEL:%.*]] = zext i1 [[STRNLEN_CHAR0CMP_NOT]] to i64
-; CHECK-NEXT:    ret i64 [[STRNLEN_SEL]]
+; CHECK-NEXT:    [[STRNLEN_CHAR0CMP:%.*]] = icmp ne i8 [[STRNLEN_CHAR0]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i1 [[STRNLEN_CHAR0CMP]] to i64
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %ptr = getelementptr [0 x i8], [0 x i8]* @ax, i32 0, i32 0
   %len = call i64 @strnlen(i8* %ptr, i64 1)
@@ -151,13 +151,28 @@ define i64 @fold_strnlen_s5_3_p5_5() {
 }
 
 
-; Fold strnlen(s5_3 + 6, 5) to 3.
+; Fold strnlen(s5_3 + 6, 3) to 3.
 
-define i64 @fold_strnlen_s5_3_p6_5() {
-; CHECK-LABEL: @fold_strnlen_s5_3_p6_5(
+define i64 @fold_strnlen_s5_3_p6_3() {
+; CHECK-LABEL: @fold_strnlen_s5_3_p6_3(
 ; CHECK-NEXT:    ret i64 3
 ;
   %ptr = getelementptr [9 x i8], [9 x i8]* @s5_3, i32 0, i32 6
-  %len = call i64 @strnlen(i8* %ptr, i64 5)
+  %len = call i64 @strnlen(i8* %ptr, i64 3)
+  ret i64 %len
+}
+
+
+; Fold even the invalid strnlen(s5_3 + 6, 4) call where the bound exceeds
+; the number of characters in the array.  This is arguably safer than
+; making the library call (although the low bound makes it unlikely that
+; the call would misbehave).
+
+define i64 @call_strnlen_s5_3_p6_4() {
+; CHECK-LABEL: @call_strnlen_s5_3_p6_4(
+; CHECK-NEXT:    ret i64 3
+;
+  %ptr = getelementptr [9 x i8], [9 x i8]* @s5_3, i32 0, i32 6
+  %len = call i64 @strnlen(i8* %ptr, i64 4)
   ret i64 %len
 }

diff  --git a/llvm/test/Transforms/InstCombine/wcslen-1.ll b/llvm/test/Transforms/InstCombine/wcslen-1.ll
index d40e1552338ab..c7cf48fd16760 100644
--- a/llvm/test/Transforms/InstCombine/wcslen-1.ll
+++ b/llvm/test/Transforms/InstCombine/wcslen-1.ll
@@ -67,8 +67,8 @@ define i1 @test_simplify5() {
 
 define i1 @test_simplify6(i32* %str_p) {
 ; CHECK-LABEL: @test_simplify6(
-; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i32, i32* [[STR_P:%.*]], align 4
-; CHECK-NEXT:    [[EQ_NULL:%.*]] = icmp eq i32 [[STRLENFIRST]], 0
+; CHECK-NEXT:    [[CHAR0:%.*]] = load i32, i32* [[STR_P:%.*]], align 4
+; CHECK-NEXT:    [[EQ_NULL:%.*]] = icmp eq i32 [[CHAR0]], 0
 ; CHECK-NEXT:    ret i1 [[EQ_NULL]]
 ;
   %str_l = call i64 @wcslen(i32* %str_p)
@@ -90,8 +90,8 @@ define i1 @test_simplify7() {
 
 define i1 @test_simplify8(i32* %str_p) {
 ; CHECK-LABEL: @test_simplify8(
-; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i32, i32* [[STR_P:%.*]], align 4
-; CHECK-NEXT:    [[NE_NULL:%.*]] = icmp ne i32 [[STRLENFIRST]], 0
+; CHECK-NEXT:    [[CHAR0:%.*]] = load i32, i32* [[STR_P:%.*]], align 4
+; CHECK-NEXT:    [[NE_NULL:%.*]] = icmp ne i32 [[CHAR0]], 0
 ; CHECK-NEXT:    ret i1 [[NE_NULL]]
 ;
   %str_l = call i64 @wcslen(i32* %str_p)
@@ -210,10 +210,13 @@ define i64 @test_no_simplify3_no_null_opt(i32 %x) #0 {
 
 @str16 = constant [1 x i16] [i16 0]
 
-define i64 @test_no_simplify4() {
-; CHECK-LABEL: @test_no_simplify4(
-; CHECK-NEXT:    [[L:%.*]] = call i64 @wcslen(i32* bitcast ([1 x i16]* @str16 to i32*))
-; CHECK-NEXT:    ret i64 [[L]]
+; Fold the invalid call to zero.  This is safer than letting the undefined
+; library call take place even though it prevents sanitizers from detecting
+; it.
+
+define i64 @test_simplify12() {
+; CHECK-LABEL: @test_simplify12(
+; CHECK-NEXT:    ret i64 0
 ;
   %l = call i64 @wcslen(i32* bitcast ([1 x i16]* @str16 to i32*))
   ret i64 %l