[llvm] efce19c - Revert "[loop-idiom] Hoist loop memcpys to loop preheader"

Tres Popp via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 28 04:16:49 PDT 2021


Author: Tres Popp
Date: 2021-04-28T13:16:34+02:00
New Revision: efce19c3b0920944e2223a8dbc363432fe39e077

URL: https://github.com/llvm/llvm-project/commit/efce19c3b0920944e2223a8dbc363432fe39e077
DIFF: https://github.com/llvm/llvm-project/commit/efce19c3b0920944e2223a8dbc363432fe39e077.diff

LOG: Revert "[loop-idiom] Hoist loop memcpys to loop preheader"

This reverts commit 75d6b8bb4056d518d06b72e6411ce3749455e2e3.

The reasoning is mentioned in https://reviews.llvm.org/D97667

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
    llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll

Removed: 
    llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll
    llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 46077b04cff4b..c9bce17fcde68 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -205,13 +205,6 @@ class LoopIdiomRecognize {
   enum class ForMemset { No, Yes };
   bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
                          ForMemset For);
-
-  template <typename MemInst>
-  bool processLoopMemIntrinsic(
-      BasicBlock *BB,
-      bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
-      const SCEV *BECount);
-  bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
   bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
 
   bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
@@ -642,10 +635,22 @@ bool LoopIdiomRecognize::runOnLoopBlock(
   for (auto &SI : StoreRefsForMemcpy)
     MadeChange |= processLoopStoreOfLoopLoad(SI, BECount);
 
-  MadeChange |= processLoopMemIntrinsic<MemCpyInst>(
-      BB, &LoopIdiomRecognize::processLoopMemCpy, BECount);
-  MadeChange |= processLoopMemIntrinsic<MemSetInst>(
-      BB, &LoopIdiomRecognize::processLoopMemSet, BECount);
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+    Instruction *Inst = &*I++;
+    // Look for memset instructions, which may be optimized to a larger memset.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+      WeakTrackingVH InstPtr(&*I);
+      if (!processLoopMemSet(MSI, BECount))
+        continue;
+      MadeChange = true;
+
+      // If processing the memset invalidated our iterator, start over from the
+      // top of the block.
+      if (!InstPtr)
+        I = BB->begin();
+      continue;
+    }
+  }
 
   return MadeChange;
 }
@@ -794,86 +799,6 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
   return Changed;
 }
 
-/// processLoopMemIntrinsic - Template function for calling 
diff erent processor
-/// functions based on mem instrinsic type.
-template <typename MemInst>
-bool LoopIdiomRecognize::processLoopMemIntrinsic(
-    BasicBlock *BB,
-    bool (LoopIdiomRecognize::*Processor)(MemInst *, const SCEV *),
-    const SCEV *BECount) {
-  bool MadeChange = false;
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
-    Instruction *Inst = &*I++;
-    // Look for memory instructions, which may be optimized to a larger one.
-    if (MemInst *MI = dyn_cast<MemInst>(Inst)) {
-      WeakTrackingVH InstPtr(&*I);
-      if (!(this->*Processor)(MI, BECount))
-        continue;
-      MadeChange = true;
-
-      // If processing the instruction invalidated our iterator, start over from
-      // the top of the block.
-      if (!InstPtr)
-        I = BB->begin();
-    }
-  }
-  return MadeChange;
-}
-
-/// processLoopMemCpy - See if this memcpy can be promoted to a large memcpy
-bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
-                                           const SCEV *BECount) {
-  // We can only handle non-volatile memcpys with a constant size.
-  if (MCI->isVolatile() || !isa<ConstantInt>(MCI->getLength()))
-    return false;
-
-  // If we're not allowed to hack on memcpy, we fail.
-  if (!HasMemcpy || DisableLIRP::Memcpy)
-    return false;
-
-  Value *Dest = MCI->getDest();
-  Value *Source = MCI->getSource();
-  if (!Dest || !Source)
-    return false;
-
-  // See if the load and store pointer expressions are AddRec like {base,+,1} on
-  // the current loop, which indicates a strided load and store.  If we have
-  // something else, it's a random load or store we can't handle.
-  const SCEVAddRecExpr *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Dest));
-  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
-    return false;
-  const SCEVAddRecExpr *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Source));
-  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
-    return false;
-
-  // Reject memcpys that are so large that they overflow an unsigned.
-  uint64_t SizeInBytes = cast<ConstantInt>(MCI->getLength())->getZExtValue();
-  if ((SizeInBytes >> 32) != 0)
-    return false;
-
-  // Check if the stride matches the size of the memcpy. If so, then we know
-  // that every byte is touched in the loop.
-  const SCEVConstant *StrStride =
-      dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-  const SCEVConstant *LoadStride =
-      dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
-  if (!StrStride || !LoadStride)
-    return false;
-
-  APInt StrIntStride = StrStride->getAPInt();
-  APInt LoadIntStride = LoadStride->getAPInt();
-  if (SizeInBytes != StrIntStride && SizeInBytes != -StrIntStride)
-    return false;
-
-  // Check if the load stride matches the store stride.
-  if (StrIntStride != LoadIntStride && StrIntStride != -LoadIntStride)
-    return false;
-
-  return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
-                                    MCI->getDestAlign(), MCI->getSourceAlign(),
-                                    MCI, MCI, StoreEv, LoadEv, BECount);
-}
-
 /// processLoopMemSet - See if this memset can be promoted to a large memset.
 bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
                                            const SCEV *BECount) {
@@ -882,7 +807,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
     return false;
 
   // If we're not allowed to hack on memset, we fail.
-  if (!HasMemset || DisableLIRP::Memset)
+  if (!HasMemset)
     return false;
 
   Value *Pointer = MSI->getDest();
@@ -1122,11 +1047,9 @@ bool LoopIdiomRecognize::processLoopStridedStore(
   ORE.emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
                               NewCall->getDebugLoc(), Preheader)
-           << "Transformed loop-strided store in "
-           << ore::NV("Function", TheStore->getFunction())
-           << " function into a call to "
+           << "Transformed loop-strided store into a call to "
            << ore::NV("NewFunction", NewCall->getCalledFunction())
-           << "() intrinsic";
+           << "() function";
   });
 
   // Okay, the memset has been formed.  Zap the original store and anything that
@@ -1214,22 +1137,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
 
   SmallPtrSet<Instruction *, 1> Stores;
   Stores.insert(TheStore);
-
-  bool IsMemCpy = isa<MemCpyInst>(TheStore);
-  const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
-
   if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
-                            StoreSize, *AA, Stores)) {
-    ORE.emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
-                                      TheStore)
-             << ore::NV("Inst", InstRemark) << " in "
-             << ore::NV("Function", TheStore->getFunction())
-             << " function will not be hoisted: "
-             << ore::NV("Reason", "The loop may access store location");
-    });
+                            StoreSize, *AA, Stores))
     return Changed;
-  }
 
   const SCEV *LdStart = LoadEv->getStart();
   unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
@@ -1243,21 +1153,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
   Value *LoadBasePtr = Expander.expandCodeFor(
       LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
 
-  // If the store is a memcpy instruction, we must check if it will write to
-  // the load memory locations. So remove it from the ignored stores.
-  if (IsMemCpy)
-    Stores.erase(TheStore);
   if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
-                            StoreSize, *AA, Stores)) {
-    ORE.emit([&]() {
-      return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
-             << ore::NV("Inst", InstRemark) << " in "
-             << ore::NV("Function", TheStore->getFunction())
-             << " function will not be hoisted: "
-             << ore::NV("Reason", "The loop may access load location");
-    });
+                            StoreSize, *AA, Stores))
     return Changed;
-  }
 
   if (avoidLIRForMultiBlockLoop())
     return Changed;
@@ -1318,9 +1216,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
                               NewCall->getDebugLoc(), Preheader)
            << "Formed a call to "
            << ore::NV("NewFunction", NewCall->getCalledFunction())
-           << "() intrinsic from " << ore::NV("Inst", InstRemark)
-           << " instruction in " << ore::NV("Function", TheStore->getFunction())
-           << " function";
+           << "() function";
   });
 
   // Okay, the memcpy has been formed.  Zap the original store and anything that

diff  --git a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
index 6f817f2b56d83..3578540cc4d2d 100644
--- a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
+++ b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
 
-; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() intrinsic from load and store instruction in test6_dest_align function
+; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() function
 
 define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
 ; CHECK-LABEL: @test6_dest_align(

diff  --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-
diff erent-types.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-
diff erent-types.ll
deleted file mode 100644
index 20def758e6316..0000000000000
--- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-
diff erent-types.ll
+++ /dev/null
@@ -1,89 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-idiom < %s -S | FileCheck %s
-
-; #include <vector>
-;
-; class SDValue {
-;     int A;
-;     int B;
-;     unsigned C;
-; };
-;
-; class SDUse {
-;     SDValue Val;
-;     SDUse **Prev = nullptr;
-;     SDUse *Next = nullptr;
-;
-; public:
-;     operator const SDValue&() const { return Val; }
-; };
-;
-; void foo(SDUse *S, int N) {
-;     // Should not hoist memcpy because source and destination are of 
diff erent types
-;     std::vector<SDValue> Ops(S, S + N);
-; }
-
-; ModuleID = '
diff erent_types.cpp'
-source_filename = "
diff erent_types.cpp"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%class.SDUse = type { %class.SDValue, %class.SDUse**, %class.SDUse* }
-%class.SDValue = type { i32, i32, i32 }
-
-declare dso_local i32 @__gxx_personality_v0(...)
-
-; Function Attrs: uwtable mustprogress
-define linkonce_odr dso_local %class.SDValue* @_ZNSt20__uninitialized_copyILb0EE13__uninit_copyIP5SDUseP7SDValueEET0_T_S7_S6_(%class.SDUse* %__first, %class.SDUse* %__last, %class.SDValue* %__result) local_unnamed_addr #0  align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; CHECK-LABEL: @_ZNSt20__uninitialized_copyILb0EE13__uninit_copyIP5SDUseP7SDValueEET0_T_S7_S6_(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP_NOT15:%.*]] = icmp eq %class.SDUse* [[__FIRST:%.*]], [[__LAST:%.*]]
-; CHECK-NEXT:    br i1 [[CMP_NOT15]], label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]]
-; CHECK:       for.inc.preheader:
-; CHECK-NEXT:    br label [[FOR_INC:%.*]]
-; CHECK:       for.inc:
-; CHECK-NEXT:    [[__CUR_017:%.*]] = phi %class.SDValue* [ [[INCDEC_PTR1:%.*]], [[FOR_INC]] ], [ [[__RESULT:%.*]], [[FOR_INC_PREHEADER]] ]
-; CHECK-NEXT:    [[__FIRST_ADDR_016:%.*]] = phi %class.SDUse* [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ], [ [[__FIRST]], [[FOR_INC_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %class.SDValue* [[__CUR_017]] to i8*
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %class.SDUse* [[__FIRST_ADDR_016]] to i8*
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(12) [[TMP0]], i8* noundef nonnull align 8 dereferenceable(12) [[TMP1]], i64 12, i1 false)
-; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds [[CLASS_SDUSE:%.*]], %class.SDUse* [[__FIRST_ADDR_016]], i64 1
-; CHECK-NEXT:    [[INCDEC_PTR1]] = getelementptr inbounds [[CLASS_SDVALUE:%.*]], %class.SDValue* [[__CUR_017]], i64 1
-; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq %class.SDUse* [[INCDEC_PTR]], [[__LAST]]
-; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_INC]]
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[INCDEC_PTR1_LCSSA:%.*]] = phi %class.SDValue* [ [[INCDEC_PTR1]], [[FOR_INC]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
-; CHECK:       for.end:
-; CHECK-NEXT:    [[__CUR_0_LCSSA:%.*]] = phi %class.SDValue* [ [[__RESULT]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1_LCSSA]], [[FOR_END_LOOPEXIT]] ]
-; CHECK-NEXT:    ret %class.SDValue* [[__CUR_0_LCSSA]]
-;
-entry:
-  %cmp.not15 = icmp eq %class.SDUse* %__first, %__last
-  br i1 %cmp.not15, label %for.end, label %for.inc.preheader
-
-for.inc.preheader:                                ; preds = %entry
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.inc.preheader, %for.inc
-  %__cur.017 = phi %class.SDValue* [ %incdec.ptr1, %for.inc ], [ %__result, %for.inc.preheader ]
-  %__first.addr.016 = phi %class.SDUse* [ %incdec.ptr, %for.inc ], [ %__first, %for.inc.preheader ]
-  %0 = bitcast %class.SDValue* %__cur.017 to i8*
-  %1 = bitcast %class.SDUse* %__first.addr.016 to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(12) %0, i8* noundef nonnull align 8 dereferenceable(12) %1, i64 12, i1 false)
-  %incdec.ptr = getelementptr inbounds %class.SDUse, %class.SDUse* %__first.addr.016, i64 1
-  %incdec.ptr1 = getelementptr inbounds %class.SDValue, %class.SDValue* %__cur.017, i64 1
-  %cmp.not = icmp eq %class.SDUse* %incdec.ptr, %__last
-  br i1 %cmp.not, label %for.end.loopexit, label %for.inc
-
-for.end.loopexit:                                 ; preds = %for.inc
-  %incdec.ptr1.lcssa = phi %class.SDValue* [ %incdec.ptr1, %for.inc ]
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  %__cur.0.lcssa = phi %class.SDValue* [ %__result, %entry ], [ %incdec.ptr1.lcssa, %for.end.loopexit ]
-  ret %class.SDValue* %__cur.0.lcssa
-}
-
-; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1

diff  --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll
deleted file mode 100644
index bb0d68ed20424..0000000000000
--- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll
+++ /dev/null
@@ -1,309 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -loop-idiom < %s -S | FileCheck %s
-
-%struct.S = type { i32, i32, i8 }
-
-; unsigned copy_noalias(S* __restrict a, S *b, int n) {
-;   for (int i = 0; i < n; i++) {
-;     a[i] = b[i];
-;   }
-;   return sizeof(a[0]);
-; }
-
-; Function Attrs: nofree nounwind uwtable mustprogress
-define dso_local i32 @copy_noalias(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr #0 {
-; CHECK-LABEL: @copy_noalias(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A1:%.*]] = bitcast %struct.S* [[A:%.*]] to i8*
-; CHECK-NEXT:    [[B2:%.*]] = bitcast %struct.S* [[B:%.*]] to i8*
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[A1]], i8* align 4 [[B2]], i64 [[TMP1]], i1 false)
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret i32 12
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[B]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[A]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %struct.S* [[ARRAYIDX]] to i8*
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
-;
-entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.cond.cleanup.loopexit:                        ; preds = %for.body
-  br label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
-  ret i32 12
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %idxprom = zext i32 %i.08 to i64
-  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom
-  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom
-  %0 = bitcast %struct.S* %arrayidx2 to i8*
-  %1 = bitcast %struct.S* %arrayidx to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false)
-  %inc = add nuw nsw i32 %i.08, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
-}
-
-; unsigned copy_may_alias(S *a, S *b, int n) {
-;   for (int i = 0; i < n; i++) {
-;     a[i] = b[i];
-;   }
-;   return sizeof(a[0]);
-; }
-
-; Function Attrs: nofree nounwind uwtable mustprogress
-define dso_local i32 @copy_may_alias(%struct.S* nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr #0 {
-; CHECK-LABEL: @copy_may_alias(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret i32 12
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[B:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[A:%.*]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8*
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %struct.S* [[ARRAYIDX]] to i8*
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) [[TMP0]], i8* nonnull align 4 dereferenceable(12) [[TMP1]], i64 12, i1 false)
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
-;
-entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.cond.cleanup.loopexit:                        ; preds = %for.body
-  br label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
-  ret i32 12
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %idxprom = zext i32 %i.08 to i64
-  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom
-  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom
-  %0 = bitcast %struct.S* %arrayidx2 to i8*
-  %1 = bitcast %struct.S* %arrayidx to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false)
-  %inc = add nuw nsw i32 %i.08, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
-}
-
-%struct.R = type <{ i8, i32, i8 }>
-
-; void copy_noalias_read(S* __restrict x, S* __restrict y, int n, int &s) {
-;   for (int i = 0; i < n; i++) {
-;     x[i] = y[i];
-;     s += y[i].b;
-;   }
-; }
-
-; Function Attrs: nofree nounwind uwtable mustprogress
-define dso_local void @copy_noalias_read(%struct.R* noalias nocapture %x, %struct.R* noalias nocapture readonly %y, i32 %n, i32* nocapture nonnull align 4 dereferenceable(4) %s) local_unnamed_addr #0 {
-; CHECK-LABEL: @copy_noalias_read(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X1:%.*]] = bitcast %struct.R* [[X:%.*]] to i8*
-; CHECK-NEXT:    [[Y2:%.*]] = bitcast %struct.R* [[Y:%.*]] to i8*
-; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[S_PROMOTED:%.*]] = load i32, i32* [[S:%.*]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i64 [[TMP0]], 6
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[X1]], i8* align 1 [[Y2]], i64 [[TMP1]], i1 false)
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.for.cond.cleanup_crit_edge:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], i32* [[S]], align 4
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret void
-; CHECK:       for.body:
-; CHECK-NEXT:    [[ADD13:%.*]] = phi i32 [ [[S_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_012]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_R:%.*]], %struct.R* [[X]], i64 [[IDXPROM]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_R]], %struct.R* [[Y]], i64 [[IDXPROM]], i32 0
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [[STRUCT_R]], %struct.R* [[Y]], i64 [[IDXPROM]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[B]], align 1
-; CHECK-NEXT:    [[ADD]] = add nsw i32 [[ADD13]], [[TMP4]]
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_012]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]]
-;
-entry:
-  %cmp11 = icmp sgt i32 %n, 0
-  br i1 %cmp11, label %for.body.lr.ph, label %for.cond.cleanup
-
-for.body.lr.ph:                                   ; preds = %entry
-  %s.promoted = load i32, i32* %s, align 4
-  br label %for.body
-
-for.cond.for.cond.cleanup_crit_edge:              ; preds = %for.body
-  %add.lcssa = phi i32 [ %add, %for.body ]
-  store i32 %add.lcssa, i32* %s, align 4
-  br label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
-  ret void
-
-for.body:                                         ; preds = %for.body.lr.ph, %for.body
-  %add13 = phi i32 [ %s.promoted, %for.body.lr.ph ], [ %add, %for.body ]
-  %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
-  %idxprom = zext i32 %i.012 to i64
-  %0 = getelementptr inbounds %struct.R, %struct.R* %x, i64 %idxprom, i32 0
-  %1 = getelementptr inbounds %struct.R, %struct.R* %y, i64 %idxprom, i32 0
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(6) %0, i8* nonnull align 1 dereferenceable(6) %1, i64 6, i1 false)
-  %b = getelementptr inbounds %struct.R, %struct.R* %y, i64 %idxprom, i32 1
-  %2 = load i32, i32* %b, align 1
-  %add = add nsw i32 %add13, %2
-  %inc = add nuw nsw i32 %i.012, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
-}
-
-%struct.SPacked = type <{ i32, i32, i8 }>
-
-; Function Attrs: nofree nounwind uwtable mustprogress
-define dso_local i32 @copy_noalias_packed(%struct.SPacked* noalias nocapture %a, %struct.SPacked* nocapture readonly %b, i32 %n) local_unnamed_addr #0 {
-; CHECK-LABEL: @copy_noalias_packed(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A1:%.*]] = bitcast %struct.SPacked* [[A:%.*]] to i8*
-; CHECK-NEXT:    [[B2:%.*]] = bitcast %struct.SPacked* [[B:%.*]] to i8*
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i64 [[TMP0]], 9
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[A1]], i8* align 1 [[B2]], i64 [[TMP1]], i1 false)
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret i32 9
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SPACKED:%.*]], %struct.SPacked* [[B]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_SPACKED]], %struct.SPacked* [[A]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %struct.SPacked* [[ARRAYIDX2]] to i8*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %struct.SPacked* [[ARRAYIDX]] to i8*
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
-;
-entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.cond.cleanup.loopexit:                        ; preds = %for.body
-  br label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
-  ret i32 9
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %idxprom = zext i32 %i.08 to i64
-  %arrayidx = getelementptr inbounds %struct.SPacked, %struct.SPacked* %b, i64 %idxprom
-  %arrayidx2 = getelementptr inbounds %struct.SPacked, %struct.SPacked* %a, i64 %idxprom
-  %0 = bitcast %struct.SPacked* %arrayidx2 to i8*
-  %1 = bitcast %struct.SPacked* %arrayidx to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(9) %0, i8* nonnull align 1 dereferenceable(9) %1, i64 9, i1 false)
-  %inc = add nuw nsw i32 %i.08, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
-}
-
-%struct.SAligned = type { i32, i32, i8, [7 x i8] }
-
-define dso_local i32 @copy_noalias_aligned(%struct.SAligned* noalias nocapture %a, %struct.SAligned* nocapture readonly %b, i32 %n) local_unnamed_addr #0 {
-; CHECK-LABEL: @copy_noalias_aligned(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A1:%.*]] = bitcast %struct.SAligned* [[A:%.*]] to i8*
-; CHECK-NEXT:    [[B2:%.*]] = bitcast %struct.SAligned* [[B:%.*]] to i8*
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A1]], i8* align 16 [[B2]], i64 [[TMP1]], i1 false)
-; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
-; CHECK:       for.cond.cleanup.loopexit:
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    ret i32 16
-; CHECK:       for.body:
-; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i32 [[I_08]] to i64
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_SALIGNED:%.*]], %struct.SAligned* [[B]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_SALIGNED]], %struct.SAligned* [[A]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %struct.SAligned* [[ARRAYIDX2]] to i8*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %struct.SAligned* [[ARRAYIDX]] to i8*
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]]
-;
-entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:                               ; preds = %entry
-  br label %for.body
-
-for.cond.cleanup.loopexit:                        ; preds = %for.body
-  br label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
-  ret i32 16
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %idxprom = zext i32 %i.08 to i64
-  %arrayidx = getelementptr inbounds %struct.SAligned, %struct.SAligned* %b, i64 %idxprom
-  %arrayidx2 = getelementptr inbounds %struct.SAligned, %struct.SAligned* %a, i64 %idxprom
-  %0 = bitcast %struct.SAligned* %arrayidx2 to i8*
-  %1 = bitcast %struct.SAligned* %arrayidx to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 16 dereferenceable(16) %0, i8* nonnull align 16 dereferenceable(16) %1, i64 16, i1 false)
-  %inc = add nuw nsw i32 %i.08, 1
-  %cmp = icmp slt i32 %inc, %n
-  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
-}
-
-; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1

diff  --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll
index b7a866f446c74..06e17fecec6da 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ;     *begin = value;
 ; }
 
-; CHECK: remark: <stdin>:4:1: Transformed loop-strided store in _Z15my_basic_memsetPcS_c function into a call to llvm.memset.p0i8.i64() intrinsic
+; CHECK: remark: <stdin>:4:1: Transformed loop-strided store into a call to llvm.memset.p0i8.i64() function
 
 define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) {
 ; CHECK-LABEL: @_Z15my_basic_memsetPcS_c(


        


More information about the llvm-commits mailing list