[llvm] ff5095d - [TTI] Provide a cost for memset_pattern which matches the libcall (#139978)

Wed May 28 09:39:58 PDT 2025

Author: Philip Reames
Date: 2025-05-28T09:39:55-07:00
New Revision: ff5095db93762412a1c09225b18f80b06b99ff68

URL: https://github.com/llvm/llvm-project/commit/ff5095db93762412a1c09225b18f80b06b99ff68
DIFF: https://github.com/llvm/llvm-project/commit/ff5095db93762412a1c09225b18f80b06b99ff68.diff

LOG: [TTI] Provide a cost for memset_pattern which matches the libcall (#139978)

The motivation is that differences in unrolling were noticed when trying
to switch from the libcall to the intrinsic. There are likely also
differences not yet noticed in other cost based decisions - such as
inlining, and possibly vectorization.

Neither cost is a good, well considered, cost but for the moment, let's
have them be equal to simplify migration. We can come back and refine
this once we have it being exercised by default.

Added: 
    llvm/test/Analysis/CostModel/X86/memset-pattern.ll

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index ff8778168686d..574152e254f15 100644

--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2408,6 +2408,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                           CmpInst::ICMP_ULT, CostKind);
       return Cost;
     }
+    case Intrinsic::experimental_memset_pattern:
+      // This cost is set to match the cost of the memset_pattern16 libcall.
+      // It should likely be re-evaluated after migration to this intrinsic
+      // is complete.
+      return TTI::TCC_Basic * 4;
     case Intrinsic::abs:
       ISD = ISD::ABS;
       break;

diff  --git a/llvm/test/Analysis/CostModel/X86/memset-pattern.ll b/llvm/test/Analysis/CostModel/X86/memset-pattern.ll
new file mode 100644
index 0000000000000..aa0c6efdf34fa
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/X86/memset-pattern.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -mtriple=x86_64-apple-darwin10.0.0 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+ at .memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+define void @via_libcall(ptr %p) nounwind ssp {
+; CHECK-LABEL: 'via_libcall'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @memset_pattern4(ptr %p, ptr @.memset_pattern, i64 200)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @memset_pattern8(ptr %p, ptr @.memset_pattern, i64 200)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @memset_pattern16(ptr %p, ptr @.memset_pattern, i64 200)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @memset_pattern4(ptr %p, ptr @.memset_pattern, i64 200)
+  call void @memset_pattern8(ptr %p, ptr @.memset_pattern, i64 200)
+  call void @memset_pattern16(ptr %p, ptr @.memset_pattern, i64 200)
+  ret void
+}
+
+declare void @memset_pattern4(ptr, ptr, i64)
+declare void @memset_pattern8(ptr, ptr, i64)
+declare void @memset_pattern16(ptr, ptr, i64)
+
+define void @via_intrinsic(ptr %p) {
+; CHECK-LABEL: 'via_intrinsic'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i16.i64(ptr align 4 %p, i16 2, i64 100, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr align 4 %p, i32 2, i64 50, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i64.i64(ptr align 4 %p, i64 2, i64 25, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.memset.pattern.p0.i128.i64(ptr align 4 %p, i128 2, i64 12, i1 false)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+  call void @llvm.experimental.memset.pattern(ptr align 4 %p, i16 2, i64 100, i1 false)
+  call void @llvm.experimental.memset.pattern(ptr align 4 %p, i32 2, i64 50, i1 false)
+  call void @llvm.experimental.memset.pattern(ptr align 4 %p, i64 2, i64 25, i1 false)
+  call void @llvm.experimental.memset.pattern(ptr align 4 %p, i128 2, i64 12, i1 false)
+  ret void
+}