[llvm] 3877039 - [LoopIdiom] Select llvm.experimental.memset.pattern intrinsic rather than memset_pattern16 libcall (#126736)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 05:48:19 PDT 2025


Author: Alex Bradbury
Date: 2025-07-09T13:48:15+01:00
New Revision: 3877039fd1d09f87f13fdf64c544eafcfc09c650

URL: https://github.com/llvm/llvm-project/commit/3877039fd1d09f87f13fdf64c544eafcfc09c650
DIFF: https://github.com/llvm/llvm-project/commit/3877039fd1d09f87f13fdf64c544eafcfc09c650.diff

LOG: [LoopIdiom] Select llvm.experimental.memset.pattern intrinsic rather than memset_pattern16 libcall (#126736)

In order to keep the change as incremental as possible, this only
introduces the memset.pattern intrinsic in cases where memset_pattern16
would have been used. Future patches can enable it on targets that don't
have the intrinsic, and select it in cases where the libcall isn't
directly usable. As the memset.pattern intrinsic takes the number of
times to store the pattern as an argument unlike memset_pattern16 which
takes the number of bytes to write, we no longer try to form an i128
pattern.

Special care is taken for cases where multiple stores in the same loop
iteration were combined to form a single pattern. For such cases, we
inherit the limitation that loops such as the following are supported:

```
for (unsigned i = 0; i < 2 * n; i += 2) {
  f[i] = 2;
  f[i+1] = 2;
}
```

But the following doesn't result in a memset.pattern (even though it
could be, by forming an appropriate pattern):
```
for (unsigned i = 0; i < 2 * n; i += 2) {
  f[i] = 2;
  f[i+1] = 3;
}
```

Addressing this existing deficiency is left for a follow-up due to a
desire not to change too much at once (i.e. to target equivalence to the
current codegen).

A command line option is introduced to force the selection of the
intrinsic even in cases it wouldn't be (i.e. in cases where the libcall
wouldn't have been selected). This is intended as a transitionary option
for testing and experimentation, to be removed at a later point.

The only platforms this should impact are those that have the memset_pattern16 libcall (Apple platforms). Testing performed to check for no unexpected codegen changes is described here https://github.com/llvm/llvm-project/pull/126736#issuecomment-3005097468

Added: 
    llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
    llvm/test/Transforms/LoopIdiom/basic.ll
    llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
    llvm/test/Transforms/LoopIdiom/struct_pattern.ll
    llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
    llvm/test/Transforms/LoopIdiom/unroll.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 0967e90e24c5f..8c84b0d1e2b3e 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -150,6 +150,11 @@ static cl::opt<bool> UseLIRCodeSizeHeurs(
              "with -Os/-Oz"),
     cl::init(true), cl::Hidden);
 
+static cl::opt<bool> ForceMemsetPatternIntrinsic(
+    "loop-idiom-force-memset-pattern-intrinsic",
+    cl::desc("Use memset.pattern intrinsic whenever possible"), cl::init(false),
+    cl::Hidden);
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -323,10 +328,15 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
       L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;
 
   HasMemset = TLI->has(LibFunc_memset);
+  // TODO: Unconditionally enable use of the memset pattern intrinsic (or at
+  // least, opt-in via target hook) once we are confident it will never result
+  // in worse codegen than without. For now, use it only when the target
+  // supports memset_pattern16 libcall (or unless this is overridden by
+  // command line option).
   HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
   HasMemcpy = TLI->has(LibFunc_memcpy);
 
-  if (HasMemset || HasMemsetPattern || HasMemcpy)
+  if (HasMemset || HasMemsetPattern || ForceMemsetPatternIntrinsic || HasMemcpy)
     if (SE->hasLoopInvariantBackedgeTakenCount(L))
       return runOnCountableLoop();
 
@@ -378,11 +388,13 @@ static APInt getStoreStride(const SCEVAddRecExpr *StoreEv) {
 }
 
 /// getMemSetPatternValue - If a strided store of the specified value is safe to
-/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
-/// be passed in.  Otherwise, return null.
+/// turn into a memset.patternn intrinsic, return the Constant that should
+/// be passed in. Otherwise, return null.
 ///
-/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
-/// just replicate their input array and then pass on to memset_pattern16.
+/// TODO this function could allow more constants than it does today (e.g.
+/// those over 16 bytes) now it has transitioned to being used for the
+/// memset.pattern intrinsic rather than directly the memset_pattern16
+/// libcall.
 static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
   // FIXME: This could check for UndefValue because it can be merged into any
   // other valid pattern.
@@ -411,14 +423,12 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
   if (Size > 16)
     return nullptr;
 
-  // If the constant is exactly 16 bytes, just use it.
-  if (Size == 16)
-    return C;
+  // For now, don't handle types that aren't int, floats, or pointers.
+  Type *CTy = C->getType();
+  if (!CTy->isIntOrPtrTy() && !CTy->isFloatingPointTy())
+    return nullptr;
 
-  // Otherwise, we'll use an array of the constants.
-  unsigned ArraySize = 16 / Size;
-  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
-  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+  return C;
 }
 
 LoopIdiomRecognize::LegalStoreKind
@@ -479,7 +489,8 @@ LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
     // It looks like we can use SplatValue.
     return LegalStoreKind::Memset;
   }
-  if (!UnorderedAtomic && HasMemsetPattern && !DisableLIRP::Memset &&
+  if (!UnorderedAtomic && (HasMemsetPattern || ForceMemsetPatternIntrinsic) &&
+      !DisableLIRP::Memset &&
       // Don't create memset_pattern16s with address spaces.
       StorePtr->getType()->getPointerAddressSpace() == 0 &&
       getMemSetPatternValue(StoredVal, DL)) {
@@ -1061,50 +1072,81 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     return Changed;
 
   // Okay, everything looks good, insert the memset.
+  Value *SplatValue = isBytewiseValue(StoredVal, *DL);
+  Constant *PatternValue = nullptr;
+  if (!SplatValue)
+    PatternValue = getMemSetPatternValue(StoredVal, DL);
+
+  // MemsetArg is the number of bytes for the memset libcall, and the number
+  // of pattern repetitions if the memset.pattern intrinsic is being used.
+  Value *MemsetArg;
+  std::optional<int64_t> BytesWritten;
+
+  if (PatternValue && (HasMemsetPattern || ForceMemsetPatternIntrinsic)) {
+    const SCEV *TripCountS =
+        SE->getTripCountFromExitCount(BECount, IntIdxTy, CurLoop);
+    if (!Expander.isSafeToExpand(TripCountS))
+      return Changed;
+    const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+    if (!ConstStoreSize)
+      return Changed;
+    Value *TripCount = Expander.expandCodeFor(TripCountS, IntIdxTy,
+                                              Preheader->getTerminator());
+    uint64_t PatternRepsPerTrip =
+        (ConstStoreSize->getValue()->getZExtValue() * 8) /
+        DL->getTypeSizeInBits(PatternValue->getType());
+    // If ConstStoreSize is not equal to the width of PatternValue, then
+    // MemsetArg is TripCount * (ConstStoreSize/PatternValueWidth). Else
+    // MemSetArg is just TripCount.
+    MemsetArg =
+        PatternRepsPerTrip == 1
+            ? TripCount
+            : Builder.CreateMul(TripCount,
+                                Builder.getIntN(IntIdxTy->getIntegerBitWidth(),
+                                                PatternRepsPerTrip));
+    if (auto *CI = dyn_cast<ConstantInt>(TripCount))
+      BytesWritten =
+          CI->getZExtValue() * ConstStoreSize->getValue()->getZExtValue();
 
-  const SCEV *NumBytesS =
-      getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
-
-  // TODO: ideally we should still be able to generate memset if SCEV expander
-  // is taught to generate the dependencies at the latest point.
-  if (!Expander.isSafeToExpand(NumBytesS))
-    return Changed;
+  } else {
+    const SCEV *NumBytesS =
+        getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
 
-  Value *NumBytes =
-      Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+    // TODO: ideally we should still be able to generate memset if SCEV expander
+    // is taught to generate the dependencies at the latest point.
+    if (!Expander.isSafeToExpand(NumBytesS))
+      return Changed;
+    MemsetArg =
+        Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+    if (auto *CI = dyn_cast<ConstantInt>(MemsetArg))
+      BytesWritten = CI->getZExtValue();
+  }
+  assert(MemsetArg && "MemsetArg should have been set");
 
   AAMDNodes AATags = TheStore->getAAMetadata();
   for (Instruction *Store : Stores)
     AATags = AATags.merge(Store->getAAMetadata());
-  if (auto CI = dyn_cast<ConstantInt>(NumBytes))
-    AATags = AATags.extendTo(CI->getZExtValue());
+  if (BytesWritten)
+    AATags = AATags.extendTo(BytesWritten.value());
   else
     AATags = AATags.extendTo(-1);
 
   CallInst *NewCall;
-  if (Value *SplatValue = isBytewiseValue(StoredVal, *DL)) {
-    NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes,
+  if (SplatValue) {
+    NewCall = Builder.CreateMemSet(BasePtr, SplatValue, MemsetArg,
                                    MaybeAlign(StoreAlignment),
                                    /*isVolatile=*/false, AATags);
-  } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
-    // Everything is emitted in default address space
-    Type *Int8PtrTy = DestInt8PtrTy;
-
-    StringRef FuncName = "memset_pattern16";
-    FunctionCallee MSP = getOrInsertLibFunc(M, *TLI, LibFunc_memset_pattern16,
-                            Builder.getVoidTy(), Int8PtrTy, Int8PtrTy, IntIdxTy);
-    inferNonMandatoryLibFuncAttrs(M, FuncName, *TLI);
-
-    // Otherwise we should form a memset_pattern16.  PatternValue is known to be
-    // an constant array of 16-bytes.  Plop the value into a mergable global.
-    Constant *PatternValue = getMemSetPatternValue(StoredVal, DL);
-    assert(PatternValue && "Expected pattern value.");
-    GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
-                                            GlobalValue::PrivateLinkage,
-                                            PatternValue, ".memset_pattern");
-    GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these.
-    GV->setAlignment(Align(16));
-    NewCall = Builder.CreateCall(MSP, {BasePtr, GV, NumBytes});
+  } else if (ForceMemsetPatternIntrinsic ||
+             isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
+    assert(isa<SCEVConstant>(StoreSizeSCEV) && "Expected constant store size");
+
+    NewCall = Builder.CreateIntrinsic(
+        Intrinsic::experimental_memset_pattern,
+        {DestInt8PtrTy, PatternValue->getType(), IntIdxTy},
+        {BasePtr, PatternValue, MemsetArg,
+         ConstantInt::getFalse(M->getContext())});
+    if (StoreAlignment)
+      cast<MemSetPatternInst>(NewCall)->setDestAlignment(*StoreAlignment);
     NewCall->setAAMetadata(AATags);
   } else {
     // Neither a memset, nor memset_pattern16

diff  --git a/llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll b/llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll
new file mode 100644
index 0000000000000..1aabba402ff9a
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/RISCV/memset-pattern.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt -passes=loop-idiom -mtriple=riscv64 < %s -S | FileCheck %s
+; RUN: opt -passes=loop-idiom -mtriple=riscv64 -loop-idiom-force-memset-pattern-intrinsic < %s -S \
+; RUN:   | FileCheck -check-prefix=CHECK-INTRIN %s
+
+define dso_local void @double_memset(ptr nocapture %p) {
+; CHECK-LABEL: @double_memset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 [[I_07]]
+; CHECK-NEXT:    store double 3.141590e+00, ptr [[PTR1]], align 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+; CHECK-INTRIN-LABEL: @double_memset(
+; CHECK-INTRIN-NEXT:  entry:
+; CHECK-INTRIN-NEXT:    call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 16, i1 false)
+; CHECK-INTRIN-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-INTRIN:       for.cond.cleanup:
+; CHECK-INTRIN-NEXT:    ret void
+; CHECK-INTRIN:       for.body:
+; CHECK-INTRIN-NEXT:    [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-INTRIN-NEXT:    [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
+; CHECK-INTRIN-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
+; CHECK-INTRIN-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
+; CHECK-INTRIN-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %ptr1 = getelementptr inbounds double, ptr %p, i64 %i.07
+  store double 3.14159e+00, ptr %ptr1, align 1
+  %inc = add nuw nsw i64 %i.07, 1
+  %exitcond.not = icmp eq i64 %inc, 16
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+;.
+; CHECK-INTRIN: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+;.

diff  --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 45af783026575..8fdaac3fdffe3 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -7,8 +7,6 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;.
 ; CHECK: @G = global i32 5
 ; CHECK: @g_50 = global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 1, i32 1, i32 1, i32 1], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
 ;.
 define void @test1(ptr %Base, i64 %Size) nounwind ssp {
 ; CHECK-LABEL: @test1(
@@ -533,7 +531,7 @@ for.end13:                                        ; preds = %for.inc10
 define void @test11_pattern(ptr nocapture %P) nounwind ssp {
 ; CHECK-LABEL: @test11_pattern(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 40000)
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[P:%.*]], i32 1, i64 10000, i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
@@ -596,7 +594,7 @@ for.end:                                          ; preds = %for.body
 define void @test13_pattern(ptr nocapture %P) nounwind ssp {
 ; CHECK-LABEL: @test13_pattern(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 80000)
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.p0.i64(ptr align 4 [[P:%.*]], ptr @G, i64 10000, i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]

diff  --git a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
index 9bbc732f8d07d..c5bc7390ed573 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-pattern-tbaa.ll
@@ -6,15 +6,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 
-;.
-; CHECK: @.memset_pattern = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x double] [double 3.141590e+00, double 3.141590e+00], align 16
-;.
 define dso_local void @double_memset(ptr nocapture %p) {
 ; CHECK-LABEL: @double_memset(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern, i64 128), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 16, i1 false), !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
@@ -44,7 +39,7 @@ for.body:
 define dso_local void @struct_memset(ptr nocapture %p) {
 ; CHECK-LABEL: @struct_memset(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.1, i64 128), !tbaa [[TBAA4:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 16, i1 false), !tbaa [[TBAA4:![0-9]+]]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
@@ -73,8 +68,7 @@ for.body:
 define dso_local void @var_memset(ptr nocapture %p, i64 %len) {
 ; CHECK-LABEL: @var_memset(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[LEN:%.*]], 3
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[P:%.*]], ptr @.memset_pattern.2, i64 [[TMP0]])
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.f64.i64(ptr align 1 [[P:%.*]], double 3.141590e+00, i64 [[TMP0:%.*]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
@@ -82,7 +76,7 @@ define dso_local void @var_memset(ptr nocapture %p, i64 %len) {
 ; CHECK-NEXT:    [[I_07:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr inbounds double, ptr [[P]], i64 [[I_07]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_07]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[LEN]]
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ;
 entry:
@@ -116,7 +110,7 @@ for.body:
 !21 = !{!22, !20, i64 0}
 !22 = !{!"B", !20, i64 0}
 ;.
-; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
 ;.
 ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}

diff  --git a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll
index b4ce766fdd73b..4b33f30b59f80 100644
--- a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll
+++ b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll
@@ -16,11 +16,6 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;}
 
 
-;.
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-;.
 define void @bar1(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-LABEL: @bar1(
 ; CHECK-NEXT:  entry:
@@ -28,8 +23,8 @@ define void @bar1(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -82,8 +77,8 @@ define void @bar2(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[N]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern.1, i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP1]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -142,7 +137,8 @@ define void @bar3(ptr nocapture %f, i32 %n) nounwind ssp {
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP1]], [[TMP4]]
 ; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr [[F:%.*]], i64 [[TMP5]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[UGLYGEP]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[UGLYGEP]], i32 2, i64 [[TMP7]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -292,5 +288,5 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
 ;.

diff  --git a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
index bca922094eebb..42db236f1d6b9 100644
--- a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
+++ b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll
@@ -11,9 +11,6 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;    f[i+1] = 0;
 ;  }
 ;}
-;.
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-;.
 define void @test(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
@@ -81,9 +78,9 @@ define void @test_pattern(ptr %f, i32 %n) nounwind ssp {
 ; CHECK:       for.body.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[MUL]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 8
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i32 [[TMP3]])
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 [[TMP2]], 2
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr align 4 [[F:%.*]], i32 2, i32 [[TMP3]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -126,5 +123,4 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.

diff  --git a/llvm/test/Transforms/LoopIdiom/unroll.ll b/llvm/test/Transforms/LoopIdiom/unroll.ll
index 621082b9eeb6f..63a960e4f3b7c 100644
--- a/llvm/test/Transforms/LoopIdiom/unroll.ll
+++ b/llvm/test/Transforms/LoopIdiom/unroll.ll
@@ -11,9 +11,6 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;    f[i+1] = 0;
 ;  }
 ;}
-;.
-; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
-;.
 define void @test(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
@@ -84,9 +81,9 @@ define void @test_pattern(ptr %f, i32 %n) nounwind ssp {
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[MUL]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP3]], 8
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[F:%.*]], ptr @.memset_pattern, i64 [[TMP4]])
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 2
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 [[F:%.*]], i32 2, i64 [[TMP4]], i1 false)
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -130,5 +127,4 @@ for.end:                                          ; preds = %for.end.loopexit, %
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind ssp }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.


        


More information about the llvm-commits mailing list