[llvm] [MemoryLocation] Teach MemoryLocation about llvm.experimental.memset.pattern (PR #120421)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 18 05:10:38 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Alex Bradbury (asb)
<details>
<summary>Changes</summary>
Stacks on top of #<!-- -->120420.
This allows alias analysis o the intrinsic of the same quality as for the libcall, which we want in order to move LoopIdiomRecognize over to selecting the intrinsic.
---
Full diff: https://github.com/llvm/llvm-project/pull/120421.diff
5 Files Affected:
- (modified) llvm/lib/Analysis/MemoryLocation.cpp (+12)
- (modified) llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp (+89-1)
- (added) llvm/test/Analysis/BasicAA/memset-pattern.ll (+18)
- (modified) llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll (+65)
- (added) llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll (+69)
``````````diff
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index b664b54c044f54..c9f3f93fabf58c 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -183,6 +183,18 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
AATags);
return MemoryLocation::getAfter(Arg, AATags);
+ case Intrinsic::experimental_memset_pattern:
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for memory intrinsic");
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ return MemoryLocation(
+ Arg,
+ LocationSize::precise(
+ LenCI->getZExtValue() *
+ DL.getTypeStoreSize(II->getArgOperand(1)->getType())),
+ AATags);
+ return MemoryLocation::getAfter(Arg, AATags);
+
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 4a3d1673c2a7c1..9067f29faa7b0c 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
using namespace llvm;
@@ -232,6 +233,59 @@ static bool canEmitLibcall(const TargetMachine *TM, Function *F,
return TLI->getLibcallName(LC) != nullptr;
}
+// Return a value appropriate for use with the memset_pattern16 libcall, if
+// possible and if we know how. (Adapted from equivalent helper in
+// LoopIdiomRecognize).
+static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
+ const TargetLibraryInfo &TLI) {
+ // FIXME: This could check for UndefValue because it can be merged into any
+ // other valid pattern.
+
+ // Don't emit libcalls if a non-default address space is being used.
+ if (Inst->getRawDest()->getType()->getPointerAddressSpace() != 0)
+ return nullptr;
+
+ Value *V = Inst->getValue();
+ const DataLayout &DL = Inst->getDataLayout();
+ Module *M = Inst->getModule();
+
+ if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
+ return nullptr;
+
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C || isa<ConstantExpr>(C))
+ return nullptr;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = DL.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+ return nullptr;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (DL.isBigEndian())
+ return nullptr;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16)
+ return nullptr;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16)
+ return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16 / Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
// TODO: Handle atomic memcpy and memcpy.inline
// TODO: Pass ScalarEvolution
bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
@@ -322,7 +376,41 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
}
case Intrinsic::experimental_memset_pattern: {
auto *Memset = cast<MemSetPatternInst>(Inst);
- expandMemSetPatternAsLoop(Memset);
+ const TargetLibraryInfo &TLI = LookupTLI(*Memset->getFunction());
+ if (Constant *PatternValue = getMemSetPattern16Value(Memset, TLI)) {
+ // FIXME: There is currently no profitability calculation for emitting
+ // the libcall vs expanding the memset.pattern directly.
+ IRBuilder<> Builder(Inst);
+ Module *M = Memset->getModule();
+ const DataLayout &DL = Memset->getDataLayout();
+
+ StringRef FuncName = "memset_pattern16";
+ FunctionCallee MSP = getOrInsertLibFunc(
+ M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(),
+ Memset->getRawDest()->getType(), Builder.getPtrTy(),
+ Memset->getLength()->getType());
+ inferNonMandatoryLibFuncAttrs(M, FuncName, TLI);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known
+ // to be an constant array of 16-bytes. Put the value into a mergable
+ // global.
+ GlobalVariable *GV = new GlobalVariable(
+ *M, PatternValue->getType(), true, GlobalValue::PrivateLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(
+ GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+ GV->setAlignment(Align(16));
+ Value *PatternPtr = GV;
+ Value *NumBytes = Builder.CreateMul(
+ Builder.getInt64(
+ DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
+ Memset->getLength());
+ CallInst *MemsetPattern16Call = Builder.CreateCall(
+ MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
+ MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
+ } else {
+ expandMemSetPatternAsLoop(Memset);
+ }
Changed = true;
Memset->eraseFromParent();
break;
diff --git a/llvm/test/Analysis/BasicAA/memset-pattern.ll b/llvm/test/Analysis/BasicAA/memset-pattern.ll
new file mode 100644
index 00000000000000..aaa605db0eb26a
--- /dev/null
+++ b/llvm/test/Analysis/BasicAA/memset-pattern.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mtriple=x86_64 -aa-pipeline=basic-aa -passes=inferattrs,aa-eval -print-all-alias-modref-info -disable-output 2>&1 %s | FileCheck %s
+
+define void @test_memset_pattern4_const_size(ptr noalias %a, i32 %pattern) {
+; CHECK-LABEL: Function: test_memset_pattern4_const_size
+; CHECK: Just Mod: Ptr: i8* %a <-> call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+; CHECK-NEXT: Just Mod: Ptr: i8* %a.gep.1 <-> call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+; CHECK-NEXT: NoModRef: Ptr: i8* %a.gep.129 <-> call void @llvm.experimental.memset.pattern.p0.i32.i64(ptr %a, i32 %pattern, i64 17, i1 false)
+
+entry:
+ load i8, ptr %a
+ call void @llvm.experimental.memset.pattern(ptr %a, i32 %pattern, i64 17, i1 0)
+ %a.gep.1 = getelementptr i8, ptr %a, i32 1
+ store i8 0, ptr %a.gep.1
+ %a.gep.129 = getelementptr i8, ptr %a, i32 129
+ store i8 1, ptr %a.gep.129
+
+ ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll b/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll
index 09d8bbf3c93bcf..c6a2cf911b4989 100644
--- a/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/memory-intrinsics-sizes.ll
@@ -66,6 +66,71 @@ define void @memset_and_store_2(ptr %ptr, i64 %len) {
ret void
}
+define void @memset_pattern_equal_size_values(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_equal_size_values(
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+ ret void
+}
+
+define void @memset_pattern_different_size_values_1(ptr %ptr, i64 %len.1, i64 %len.2) {
+; CHECK-LABEL: @memset_pattern_different_size_values_1(
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN_1:%.*]], i1 false)
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 [[LEN_2:%.*]], i1 false)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len.1, i1 false)
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len.2, i1 false)
+ ret void
+}
+
+define void @memset_pattern_different_size_values_2(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_different_size_values_2(
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 100, i1 false)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 100, i1 false)
+ ret void
+}
+
+define void @memset_pattern_different_size_values_3(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_different_size_values_3(
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 100, i1 false)
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 100, i1 false)
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+ ret void
+}
+
+define void @memset_pattern_and_store_1(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_and_store_1(
+; CHECK-NEXT: store i64 123, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT: ret void
+;
+ store i64 123, ptr %ptr
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+ ret void
+}
+
+define void @memset_pattern_and_store_2(ptr %ptr, i64 %len) {
+; CHECK-LABEL: @memset_pattern_and_store_2(
+; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 [[PTR:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT: store i64 123, ptr [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ call void @llvm.experimental.memset.pattern.p0.i8.i64(ptr align 1 %ptr, i8 0, i64 %len, i1 false)
+ store i64 123, ptr %ptr
+ ret void
+}
+
define void @memcpy_equal_size_values(ptr noalias %src, ptr noalias %dst, i64 %len) {
; CHECK-LABEL: @memcpy_equal_size_values(
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 [[LEN:%.*]], i1 false)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
new file mode 100644
index 00000000000000..6d5f5b8a6d4fb2
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
+
+define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
+; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK: [[LOADSTORELOOP]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
+; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1(
+; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 16)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_nz_as(
+; CHECK-SAME: ptr addrspace(1) [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK: [[LOADSTORELOOP]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[A]], i64 [[TMP1]]
+; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(1) [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
+; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr addrspace(1) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_16(ptr %a) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_16(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 256)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_x(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 0)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/120421
More information about the llvm-commits
mailing list