[llvm] [MemoryLocation] Teach MemoryLocation about llvm.experimental.memset.pattern (PR #120421)

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 18 05:10:03 PST 2024


https://github.com/asb created https://github.com/llvm/llvm-project/pull/120421

Stacks on top of #120420.

This allows alias analysis o the intrinsic of the same quality as for the libcall, which we want in order to move LoopIdiomRecognize over to selecting the intrinsic.

>From 442c72fa70af7bc4cc64b8866c918e3b89a01981 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 18 Dec 2024 09:56:55 +0000
Subject: [PATCH 1/3] [PreISelIntrinsicLowering] Produce a memset_pattern16
 libcall for llvm.experimental.memset.pattern when available

This is to enable a transition of LoopIdiomRecognize to selecting the
llvm.experimental.memset.pattern intrinsic as requested in #118632 (as
opposed to supporting selection of the libcall or the intrinsic). As
such, although it _is_ a FIXME to add costing considerations on whether
to lower to the libcall (when available) or expand directly, lacking
such logic is helpful at this stage in order to minimise any potential
code gen changes in this transition.
---
 llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 90 ++++++++++++++++++-
 .../X86/memset-pattern.ll                     | 69 ++++++++++++++
 2 files changed, 158 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll

diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 4a3d1673c2a7c1..9067f29faa7b0c 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
 
 using namespace llvm;
@@ -232,6 +233,59 @@ static bool canEmitLibcall(const TargetMachine *TM, Function *F,
   return TLI->getLibcallName(LC) != nullptr;
 }
 
+// Return a value appropriate for use with the memset_pattern16 libcall, if
+// possible and if we know how. (Adapted from equivalent helper in
+// LoopIdiomRecognize).
+static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
+                                         const TargetLibraryInfo &TLI) {
+  // FIXME: This could check for UndefValue because it can be merged into any
+  // other valid pattern.
+
+  // Don't emit libcalls if a non-default address space is being used.
+  if (Inst->getRawDest()->getType()->getPointerAddressSpace() != 0)
+    return nullptr;
+
+  Value *V = Inst->getValue();
+  const DataLayout &DL = Inst->getDataLayout();
+  Module *M = Inst->getModule();
+
+  if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
+    return nullptr;
+
+  // If the value isn't a constant, we can't promote it to being in a constant
+  // array.  We could theoretically do a store to an alloca or something, but
+  // that doesn't seem worthwhile.
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C || isa<ConstantExpr>(C))
+    return nullptr;
+
+  // Only handle simple values that are a power of two bytes in size.
+  uint64_t Size = DL.getTypeSizeInBits(V->getType());
+  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+    return nullptr;
+
+  // Don't care enough about darwin/ppc to implement this.
+  if (DL.isBigEndian())
+    return nullptr;
+
+  // Convert to size in bytes.
+  Size /= 8;
+
+  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+  // if the top and bottom are the same (e.g. for vectors and large integers).
+  if (Size > 16)
+    return nullptr;
+
+  // If the constant is exactly 16 bytes, just use it.
+  if (Size == 16)
+    return C;
+
+  // Otherwise, we'll use an array of the constants.
+  unsigned ArraySize = 16 / Size;
+  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
 // TODO: Handle atomic memcpy and memcpy.inline
 // TODO: Pass ScalarEvolution
 bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
@@ -322,7 +376,41 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
     }
     case Intrinsic::experimental_memset_pattern: {
       auto *Memset = cast<MemSetPatternInst>(Inst);
-      expandMemSetPatternAsLoop(Memset);
+      const TargetLibraryInfo &TLI = LookupTLI(*Memset->getFunction());
+      if (Constant *PatternValue = getMemSetPattern16Value(Memset, TLI)) {
+        // FIXME: There is currently no profitability calculation for emitting
+        // the libcall vs expanding the memset.pattern directly.
+        IRBuilder<> Builder(Inst);
+        Module *M = Memset->getModule();
+        const DataLayout &DL = Memset->getDataLayout();
+
+        StringRef FuncName = "memset_pattern16";
+        FunctionCallee MSP = getOrInsertLibFunc(
+            M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(),
+            Memset->getRawDest()->getType(), Builder.getPtrTy(),
+            Memset->getLength()->getType());
+        inferNonMandatoryLibFuncAttrs(M, FuncName, TLI);
+
+        // Otherwise we should form a memset_pattern16.  PatternValue is known
+        // to be an constant array of 16-bytes. Put the value into a mergable
+        // global.
+        GlobalVariable *GV = new GlobalVariable(
+            *M, PatternValue->getType(), true, GlobalValue::PrivateLinkage,
+            PatternValue, ".memset_pattern");
+        GV->setUnnamedAddr(
+            GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+        GV->setAlignment(Align(16));
+        Value *PatternPtr = GV;
+        Value *NumBytes = Builder.CreateMul(
+            Builder.getInt64(
+                DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
+            Memset->getLength());
+        CallInst *MemsetPattern16Call = Builder.CreateCall(
+            MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
+        MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
+      } else {
+        expandMemSetPatternAsLoop(Memset);
+      }
       Changed = true;
       Memset->eraseFromParent();
       break;
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
new file mode 100644
index 00000000000000..6d5f5b8a6d4fb2
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
+
+define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
+; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK:       [[LOADSTORELOOP]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    store i128 [[VALUE]], ptr [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
+  ret void
+}
+
+define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1(
+; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 16)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+  ret void
+}
+
+define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_nz_as(
+; CHECK-SAME: ptr addrspace(1) [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK:       [[LOADSTORELOOP]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[A]], i64 [[TMP1]]
+; CHECK-NEXT:    store i128 -113427455635030943652277463699152839203, ptr addrspace(1) [[TMP2]], align 1
+; CHECK-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.experimental.memset.pattern(ptr addrspace(1) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+  ret void
+}
+
+define void @memset_pattern_i128_16(ptr %a) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_16(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 256)
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 0)
+  ret void
+}
+
+define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_x(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 16, [[X]]
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 0)
+  ret void
+}

>From 8ecb682dbd9ad12c9946b9b3f442eab0e89d06d4 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 18 Dec 2024 13:02:17 +0000
Subject: [PATCH 2/3] [test] Pre-commit llvm.experimental.memset.pattern tests
 prior to MemoryLocation changes

The next patch will teach MemoryLocation about
llvm.experimental.memset.pattern.
---
 llvm/test/Analysis/BasicAA/memset-pattern.ll  | 18 +++++
 .../memory-intrinsics-sizes.ll                | 65 +++++++++++++++++++
 2 files changed, 83 insertions(+)
 create mode 100644 llvm/test/Analysis/BasicAA/memset-pattern.ll

diff --git a/llvm/test/Analysis/BasicAA/memset-pattern.ll b/llvm/test/Analysis/BasicAA/memset-pattern.ll
new file mode 100644
index 00000000000000..33d3d125b5794e
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/memset-pattern.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mtriple=x86_64 -aa-pipeline=basic-aa -passes=inferattrs,aa-eval -print-all-alias-modref-info -disable-output 2>&1 %s | FileCheck %s
+
+define void @test_memset_pattern4_const_size(ptr noalias %a, i32 %pattern) {
+; CHECK-LABEL: Function: test_memset_pattern4_const_size
+; CHECK:      Just Mod:  Ptr: i8* %a	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+; CHECK-NEXT: Just Mod:  Ptr: i8* %a.gep.1	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+; CHECK-NEXT: Just Mod:  Ptr: i8* %a.gep.129	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+
+entry:
+  load i8, ptr %a
+  call void @llvm.experimental.memset.pattern(ptr %a, i32 %pattern, i64 17, i1 0)
+  %a.gep.1 = getelementptr i8, ptr %a, i32 1
+  store i8 0, ptr %a.gep.1
+  %a.gep.129 = getelementptr i8, ptr %a, i32 129
+  store i8 1, ptr %a.gep.129
+
+  ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll
index 09d8bbf3c93bcf..c6a2cf911b4989 100644
--- a/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll
@@ -66,6 +66,71 @@ define void @memset_and_store_2(ptr %ptr, i64 %len) {
   ret void
 }
 
+define void @memset_pattern_equal_size_values(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_equal_size_values(
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+  ret void
+}
+
+define void @memset_pattern_different_size_values_1(ptr %ptr, i64 %len.1, i64 %len.2) {
+; CHECK-LABEL: @memset_pattern_different_size_values_1(
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN_1:%.*]], i1 false)
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 [[LEN_2:%.*]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len.1, i1 false)
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len.2, i1 false)
+  ret void
+}
+
+define void @memset_pattern_different_size_values_2(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_different_size_values_2(
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 100, i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 100, i1 false)
+  ret void
+}
+
+define void @memset_pattern_different_size_values_3(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_different_size_values_3(
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 100, i1 false)
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 100, i1 false)
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+  ret void
+}
+
+define void @memset_pattern_and_store_1(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_and_store_1(
+; CHECK-NEXT:    store i64 123, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    ret void
+;
+  store i64 123, ptr %ptr
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+  ret void
+}
+
+define void @memset_pattern_and_store_2(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_and_store_2(
+; CHECK-NEXT:    call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    store i64 123, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+  store i64 123, ptr %ptr
+  ret void
+}
+
 define void @memcpy_equal_size_values(ptr noalias %src, ptr noalias %dst, i64 %len) {
 ; CHECK-LABEL: @memcpy_equal_size_values(
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[LEN:%.*]], i1 false)

>From 4549a1fab4dcb523b71bb8be66bbaa48c9467050 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 18 Dec 2024 13:04:06 +0000
Subject: [PATCH 3/3] [MemoryLocation] Teach MemoryLocation about
 llvm.experimental.memset.pattern

---
 llvm/lib/Analysis/MemoryLocation.cpp         | 12 ++++++++++++
 llvm/test/Analysis/BasicAA/memset-pattern.ll |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index b664b54c044f54..c9f3f93fabf58c 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -183,6 +183,18 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
                               AATags);
       return MemoryLocation::getAfter(Arg, AATags);
 
+    case Intrinsic::experimental_memset_pattern:
+      assert((ArgIdx == 0 || ArgIdx == 1) &&
+             "Invalid argument index for memory intrinsic");
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+        return MemoryLocation(
+            Arg,
+            LocationSize::precise(
+                LenCI->getZExtValue() *
+                DL.getTypeStoreSize(II->getArgOperand(1)->getType())),
+            AATags);
+      return MemoryLocation::getAfter(Arg, AATags);
+
     case Intrinsic::lifetime_start:
     case Intrinsic::lifetime_end:
     case Intrinsic::invariant_start:
diff --git a/llvm/test/Analysis/BasicAA/memset-pattern.ll b/llvm/test/Analysis/BasicAA/memset-pattern.ll
index 33d3d125b5794e..aaa605db0eb26a 100644
--- a/llvm/test/Analysis/BasicAA/memset-pattern.ll
+++ b/llvm/test/Analysis/BasicAA/memset-pattern.ll
@@ -4,7 +4,7 @@ define void @test_memset_pattern4_const_size(ptr noalias %a, i32 %pattern) {
 ; CHECK-LABEL: Function: test_memset_pattern4_const_size
 ; CHECK:      Just Mod:  Ptr: i8* %a	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
 ; CHECK-NEXT: Just Mod:  Ptr: i8* %a.gep.1	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
-; CHECK-NEXT: Just Mod:  Ptr: i8* %a.gep.129	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+; CHECK-NEXT: NoModRef:  Ptr: i8* %a.gep.129	<->  call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
 
 entry:
   load i8, ptr %a



More information about the llvm-commits mailing list