[llvm] [PreISelIntrinsicLowering] Produce a memset_pattern16 libcall for llvm.experimental.memset.pattern when available (PR #120420)
Alex Bradbury via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 23:08:03 PST 2025
https://github.com/asb updated https://github.com/llvm/llvm-project/pull/120420
>From 442c72fa70af7bc4cc64b8866c918e3b89a01981 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 18 Dec 2024 09:56:55 +0000
Subject: [PATCH 01/14] [PreISelIntrinsicLowering] Produce a memset_pattern16
libcall for llvm.experimental.memset.pattern when available
This is to enable a transition of LoopIdiomRecognize to selecting the
llvm.experimental.memset.pattern intrinsic as requested in #118632 (as
opposed to supporting selection of the libcall or the intrinsic). As
such, although it _is_ a FIXME to add costing considerations on whether
to lower to the libcall (when available) or expand directly, lacking
such logic is helpful at this stage in order to minimise any potential
code gen changes in this transition.
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 90 ++++++++++++++++++-
.../X86/memset-pattern.ll | 69 ++++++++++++++
2 files changed, 158 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 4a3d1673c2a7c1..9067f29faa7b0c 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
using namespace llvm;
@@ -232,6 +233,59 @@ static bool canEmitLibcall(const TargetMachine *TM, Function *F,
return TLI->getLibcallName(LC) != nullptr;
}
+// Return a value appropriate for use with the memset_pattern16 libcall, if
+// possible and if we know how. (Adapted from equivalent helper in
+// LoopIdiomRecognize).
+static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
+ const TargetLibraryInfo &TLI) {
+ // FIXME: This could check for UndefValue because it can be merged into any
+ // other valid pattern.
+
+ // Don't emit libcalls if a non-default address space is being used.
+ if (Inst->getRawDest()->getType()->getPointerAddressSpace() != 0)
+ return nullptr;
+
+ Value *V = Inst->getValue();
+ const DataLayout &DL = Inst->getDataLayout();
+ Module *M = Inst->getModule();
+
+ if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
+ return nullptr;
+
+ // If the value isn't a constant, we can't promote it to being in a constant
+ // array. We could theoretically do a store to an alloca or something, but
+ // that doesn't seem worthwhile.
+ Constant *C = dyn_cast<Constant>(V);
+ if (!C || isa<ConstantExpr>(C))
+ return nullptr;
+
+ // Only handle simple values that are a power of two bytes in size.
+ uint64_t Size = DL.getTypeSizeInBits(V->getType());
+ if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+ return nullptr;
+
+ // Don't care enough about darwin/ppc to implement this.
+ if (DL.isBigEndian())
+ return nullptr;
+
+ // Convert to size in bytes.
+ Size /= 8;
+
+ // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+ // if the top and bottom are the same (e.g. for vectors and large integers).
+ if (Size > 16)
+ return nullptr;
+
+ // If the constant is exactly 16 bytes, just use it.
+ if (Size == 16)
+ return C;
+
+ // Otherwise, we'll use an array of the constants.
+ unsigned ArraySize = 16 / Size;
+ ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
// TODO: Handle atomic memcpy and memcpy.inline
// TODO: Pass ScalarEvolution
bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
@@ -322,7 +376,41 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
}
case Intrinsic::experimental_memset_pattern: {
auto *Memset = cast<MemSetPatternInst>(Inst);
- expandMemSetPatternAsLoop(Memset);
+ const TargetLibraryInfo &TLI = LookupTLI(*Memset->getFunction());
+ if (Constant *PatternValue = getMemSetPattern16Value(Memset, TLI)) {
+ // FIXME: There is currently no profitability calculation for emitting
+ // the libcall vs expanding the memset.pattern directly.
+ IRBuilder<> Builder(Inst);
+ Module *M = Memset->getModule();
+ const DataLayout &DL = Memset->getDataLayout();
+
+ StringRef FuncName = "memset_pattern16";
+ FunctionCallee MSP = getOrInsertLibFunc(
+ M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(),
+ Memset->getRawDest()->getType(), Builder.getPtrTy(),
+ Memset->getLength()->getType());
+ inferNonMandatoryLibFuncAttrs(M, FuncName, TLI);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known
+ // to be an constant array of 16-bytes. Put the value into a mergable
+ // global.
+ GlobalVariable *GV = new GlobalVariable(
+ *M, PatternValue->getType(), true, GlobalValue::PrivateLinkage,
+ PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(
+ GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+ GV->setAlignment(Align(16));
+ Value *PatternPtr = GV;
+ Value *NumBytes = Builder.CreateMul(
+ Builder.getInt64(
+ DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
+ Memset->getLength());
+ CallInst *MemsetPattern16Call = Builder.CreateCall(
+ MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
+ MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
+ } else {
+ expandMemSetPatternAsLoop(Memset);
+ }
Changed = true;
Memset->eraseFromParent();
break;
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
new file mode 100644
index 00000000000000..6d5f5b8a6d4fb2
--- /dev/null
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
+
+define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
+; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK: [[LOADSTORELOOP]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP1]]
+; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
+; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1(
+; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 16)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_nz_as(
+; CHECK-SAME: ptr addrspace(1) [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK: [[LOADSTORELOOP]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr addrspace(1) [[A]], i64 [[TMP1]]
+; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(1) [[TMP2]], align 1
+; CHECK-NEXT: [[TMP3]] = add i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
+; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr addrspace(1) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_16(ptr %a) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_16(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 256)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_x(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 0)
+ ret void
+}
>From 071a399355b754a9fc4a6ba9371cdb307a093136 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:18 +0000
Subject: [PATCH 02/14] Use typeSizeEqualsStoreSize and isPowerof2
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 9067f29faa7b0c..296312ea4de8b7 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -246,6 +246,7 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
return nullptr;
Value *V = Inst->getValue();
+ Type *VTy = V->getType();
const DataLayout &DL = Inst->getDataLayout();
Module *M = Inst->getModule();
@@ -260,8 +261,8 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
return nullptr;
// Only handle simple values that are a power of two bytes in size.
- uint64_t Size = DL.getTypeSizeInBits(V->getType());
- if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+ uint64_t Size = DL.getTypeSizeInBits(VTy);
+ if (!DL.typeSizeEqualsStoreSize(VTy) || !isPowerOf2_64(Size))
return nullptr;
// Don't care enough about darwin/ppc to implement this.
>From 5c92d38549d62efe8678c1711e52238954ecf3a0 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:18 +0000
Subject: [PATCH 03/14] Add test cases to show 'splatting' support for
generating i128 constants for narrower memset.pattern
---
.../X86/memset-pattern.ll | 28 +++++++++++++++++--
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 6d5f5b8a6d4fb2..51276c2419d794 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -22,7 +22,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
@@ -50,7 +50,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
define void @memset_pattern_i128_16(ptr %a) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 256)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 256)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 0)
@@ -61,9 +61,31 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 0)
ret void
}
+
+define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i16_x(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 0)
+ ret void
+}
+
+define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_x(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xaaaabbbbccccdddd, i64 %x, i1 0)
+ ret void
+}
>From eee7696feccc3fe23b591ff3d340087fafbd0ac6 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:18 +0000
Subject: [PATCH 04/14] i1 0 => i1 false
---
.../PreISelIntrinsicLowering/X86/memset-pattern.ll | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 51276c2419d794..f45d7e90815fab 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -15,7 +15,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 false)
ret void
}
@@ -25,7 +25,7 @@ define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 16)
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
ret void
}
@@ -43,7 +43,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr addrspace(1) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr addrspace(1) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
ret void
}
@@ -53,7 +53,7 @@ define void @memset_pattern_i128_16(ptr %a) nounwind {
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 256)
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
ret void
}
@@ -64,7 +64,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
ret void
}
@@ -75,7 +75,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
ret void
}
@@ -86,6 +86,6 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xaaaabbbbccccdddd, i64 %x, i1 0)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xaaaabbbbccccdddd, i64 %x, i1 false)
ret void
}
>From 70ebad30b522ca1e32a3f350789866ab79467d21 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:19 +0000
Subject: [PATCH 05/14] Add --check-globals to memset-pattern.ll test and fix
typo in i64 splatting test
---
.../X86/memset-pattern.ll | 22 ++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index f45d7e90815fab..27f70e92c09bf0 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -1,6 +1,13 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
+;.
+; CHECK: @.memset_pattern = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.3 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+;.
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -72,7 +79,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i16_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -82,10 +89,15 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i64_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 8, [[X]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
- tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xaaaabbbbccccdddd, i64 %x, i1 false)
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i64 %x, i1 false)
ret void
}
+;.
+; CHECK: attributes #[[ATTR0]] = { nounwind }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind willreturn memory(argmem: readwrite) }
+;.
>From 0244831ebdf33084679d09f3294883730440f43f Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:19 +0000
Subject: [PATCH 06/14] Add test coverage for non-zero AS and an additional
assert
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 6 ++++--
.../X86/memset-pattern.ll | 19 +++++++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 296312ea4de8b7..e9706d4915647a 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -395,9 +395,11 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
// Otherwise we should form a memset_pattern16. PatternValue is known
// to be an constant array of 16-bytes. Put the value into a mergable
// global.
+ assert(Memset->getRawDest()->getType()->getPointerAddressSpace() == 0 &&
+ "Should have skipped if non-zero AS");
GlobalVariable *GV = new GlobalVariable(
- *M, PatternValue->getType(), true, GlobalValue::PrivateLinkage,
- PatternValue, ".memset_pattern");
+ *M, PatternValue->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
GV->setUnnamedAddr(
GlobalValue::UnnamedAddr::Global); // Ok to merge these.
GV->setAlignment(Align(16));
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 27f70e92c09bf0..098cd6b455e06f 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -75,6 +75,25 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
ret void
}
+define void @memset_pattern_i128_x_nonzero_as(ptr addrspace(10) %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_x_nonzero_as(
+; CHECK-SAME: ptr addrspace(10) [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK: [[LOADSTORELOOP]]:
+; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i128, ptr addrspace(10) [[A]], i64 [[TMP2]]
+; CHECK-NEXT: store i128 -113427455635030943652277463699152839203, ptr addrspace(10) [[TMP3]], align 1
+; CHECK-NEXT: [[TMP4]] = add i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr addrspace(10) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
+ ret void
+}
+
define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i16_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
>From 9b6d15e704e25e33601d2f37adebc41582f62e1c Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:19 +0000
Subject: [PATCH 07/14] Add TODO about preserving call site attributes
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index e9706d4915647a..6c63d4f65968d8 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -408,6 +408,8 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
Builder.getInt64(
DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
Memset->getLength());
+ // TODO: Consider attempting to preserve call site attributes. See
+ // e.g. mergeAttributesAndFlags in SimplifyLibCalls.
CallInst *MemsetPattern16Call = Builder.CreateCall(
MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
>From 25960aa585edeb816998abf0ce82cd9075761336 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:19 +0000
Subject: [PATCH 08/14] Add test for preservation of tbaa metadata
---
.../X86/memset-pattern.ll | 35 +++++++++++++++----
1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 098cd6b455e06f..95e7ca053ec222 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -3,10 +3,11 @@
;.
; CHECK: @.memset_pattern = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
-; CHECK: @.memset_pattern.1 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
; CHECK: @.memset_pattern.3 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.5 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
;.
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -29,7 +30,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -57,7 +58,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
define void @memset_pattern_i128_16(ptr %a) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 256)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 256)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -68,7 +69,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
@@ -98,7 +99,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i16_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -115,8 +116,30 @@ define void @memset_pattern_i64_x(ptr %a, i64 %x) nounwind {
tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0xaaaabbbbccccdddd, i64 %x, i1 false)
ret void
}
+
+; Demonstrate that TBAA metadata is preserved.
+define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_128_tbaa(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.1, i64 1024), !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i64 u0x400921fb54442d18, i64 128, i1 false), !tbaa !5
+ ret void
+}
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"double", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C++ TBAA"}
+
;.
; CHECK: attributes #[[ATTR0]] = { nounwind }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind willreturn memory(argmem: readwrite) }
;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"double", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+;.
>From 1d3993473d4c23f7706ef0df80f17f7d88511ce2 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 15 Jan 2025 09:42:19 +0000
Subject: [PATCH 09/14] add todo about relaxing alignment requirement
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 6c63d4f65968d8..f8c47779e5a9ac 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -402,6 +402,7 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
GV->setUnnamedAddr(
GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+ // TODO: Consider relaxing alignment requirement.
GV->setAlignment(Align(16));
Value *PatternPtr = GV;
Value *NumBytes = Builder.CreateMul(
>From ae7d8261362f467c52c9e8b17ffb6f50b6fceddf Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 22 Jan 2025 06:13:12 +0000
Subject: [PATCH 10/14] Avoid truncating ArraySize to unsigned
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index f8c47779e5a9ac..816f630a0212cf 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -282,7 +282,7 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
return C;
// Otherwise, we'll use an array of the constants.
- unsigned ArraySize = 16 / Size;
+ uint64_t ArraySize = 16 / Size;
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
>From b9f9d9828de0851268a2c444ed9aa99a9d818aa4 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Tue, 28 Jan 2025 10:13:50 +0000
Subject: [PATCH 11/14] Add test case including alignment specified at callsite
---
.../X86/memset-pattern.ll | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 95e7ca053ec222..72144fcdc97eef 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -8,6 +8,7 @@
; CHECK: @.memset_pattern.3 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
; CHECK: @.memset_pattern.5 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.6 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
;.
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -55,10 +56,20 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
ret void
}
+define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
+; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 16)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
+ ret void
+}
+
define void @memset_pattern_i128_16(ptr %a) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 256)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 256)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -69,7 +80,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.6, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
>From 2c55ea5296ba341cbe97d193c32564b719358e04 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Tue, 28 Jan 2025 10:39:29 +0000
Subject: [PATCH 12/14] Preserve call site attributes on the destination pointr
argument
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 9 +++++++--
.../PreISelIntrinsicLowering/X86/memset-pattern.ll | 2 +-
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index aac07f31e97282..a8693f1bcdb146 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -410,11 +410,16 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
Builder.getInt64(
DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
Memset->getLength());
- // TODO: Consider attempting to preserve call site attributes. See
- // e.g. mergeAttributesAndFlags in SimplifyLibCalls.
CallInst *MemsetPattern16Call = Builder.CreateCall(
MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
+ // Preserve any call site attributes on the destination pointer
+ // argument (e.g. alignment).
+ AttrBuilder ArgAttrs(Memset->getContext(),
+ Memset->getAttributes().getParamAttrs(0));
+ MemsetPattern16Call->setAttributes(
+ MemsetPattern16Call->getAttributes().addParamAttributes(
+ Memset->getContext(), 0, ArgAttrs));
} else {
expandMemSetPatternAsLoop(Memset);
}
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 72144fcdc97eef..7cfdcb8578809e 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -59,7 +59,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.4, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
>From b9d366aca33859220d41accec2b3fbe2f09e6f43 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Jan 2025 15:35:25 +0000
Subject: [PATCH 13/14] Replace FIXME with TODO
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index a8693f1bcdb146..2a73e83dfa44c3 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -239,7 +239,7 @@ static bool canEmitLibcall(const TargetMachine *TM, Function *F,
// LoopIdiomRecognize).
static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
const TargetLibraryInfo &TLI) {
- // FIXME: This could check for UndefValue because it can be merged into any
+ // TODO: This could check for UndefValue because it can be merged into any
// other valid pattern.
// Don't emit libcalls if a non-default address space is being used.
>From f960ef300a72ac87d70b9a26cc98a3e59bb4523f Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Thu, 30 Jan 2025 07:07:32 +0000
Subject: [PATCH 14/14] Address review comment - adopt early break
---
llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 89 ++++++++++---------
1 file changed, 47 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 2a73e83dfa44c3..27fa0b43d74f66 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -379,50 +379,55 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
case Intrinsic::experimental_memset_pattern: {
auto *Memset = cast<MemSetPatternInst>(Inst);
const TargetLibraryInfo &TLI = LookupTLI(*Memset->getFunction());
- if (Constant *PatternValue = getMemSetPattern16Value(Memset, TLI)) {
- // FIXME: There is currently no profitability calculation for emitting
- // the libcall vs expanding the memset.pattern directly.
- IRBuilder<> Builder(Inst);
- Module *M = Memset->getModule();
- const DataLayout &DL = Memset->getDataLayout();
-
- StringRef FuncName = "memset_pattern16";
- FunctionCallee MSP = getOrInsertLibFunc(
- M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(),
- Memset->getRawDest()->getType(), Builder.getPtrTy(),
- Memset->getLength()->getType());
- inferNonMandatoryLibFuncAttrs(M, FuncName, TLI);
-
- // Otherwise we should form a memset_pattern16. PatternValue is known
- // to be an constant array of 16-bytes. Put the value into a mergable
- // global.
- assert(Memset->getRawDest()->getType()->getPointerAddressSpace() == 0 &&
- "Should have skipped if non-zero AS");
- GlobalVariable *GV = new GlobalVariable(
- *M, PatternValue->getType(), /*isConstant=*/true,
- GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
- GV->setUnnamedAddr(
- GlobalValue::UnnamedAddr::Global); // Ok to merge these.
- // TODO: Consider relaxing alignment requirement.
- GV->setAlignment(Align(16));
- Value *PatternPtr = GV;
- Value *NumBytes = Builder.CreateMul(
- Builder.getInt64(
- DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8),
- Memset->getLength());
- CallInst *MemsetPattern16Call = Builder.CreateCall(
- MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
- MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
- // Preserve any call site attributes on the destination pointer
- // argument (e.g. alignment).
- AttrBuilder ArgAttrs(Memset->getContext(),
- Memset->getAttributes().getParamAttrs(0));
- MemsetPattern16Call->setAttributes(
- MemsetPattern16Call->getAttributes().addParamAttributes(
- Memset->getContext(), 0, ArgAttrs));
- } else {
+ Constant *PatternValue = getMemSetPattern16Value(Memset, TLI);
+ if (!PatternValue) {
+ // If it isn't possible to emit a memset_pattern16 libcall, expand to
+ // a loop instead.
expandMemSetPatternAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ break;
}
+ // FIXME: There is currently no profitability calculation for emitting
+ // the libcall vs expanding the memset.pattern directly.
+ IRBuilder<> Builder(Inst);
+ Module *M = Memset->getModule();
+ const DataLayout &DL = Memset->getDataLayout();
+
+ StringRef FuncName = "memset_pattern16";
+ FunctionCallee MSP = getOrInsertLibFunc(
+ M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(),
+ Memset->getRawDest()->getType(), Builder.getPtrTy(),
+ Memset->getLength()->getType());
+ inferNonMandatoryLibFuncAttrs(M, FuncName, TLI);
+
+ // Otherwise we should form a memset_pattern16. PatternValue is known
+ // to be an constant array of 16-bytes. Put the value into a mergable
+ // global.
+ assert(Memset->getRawDest()->getType()->getPointerAddressSpace() == 0 &&
+ "Should have skipped if non-zero AS");
+ GlobalVariable *GV = new GlobalVariable(
+ *M, PatternValue->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, PatternValue, ".memset_pattern");
+ GV->setUnnamedAddr(
+ GlobalValue::UnnamedAddr::Global); // Ok to merge these.
+ // TODO: Consider relaxing alignment requirement.
+ GV->setAlignment(Align(16));
+ Value *PatternPtr = GV;
+ Value *NumBytes = Builder.CreateMul(
+ Builder.getInt64(DL.getTypeSizeInBits(Memset->getValue()->getType()) /
+ 8),
+ Memset->getLength());
+ CallInst *MemsetPattern16Call =
+ Builder.CreateCall(MSP, {Memset->getRawDest(), PatternPtr, NumBytes});
+ MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata());
+ // Preserve any call site attributes on the destination pointer
+ // argument (e.g. alignment).
+ AttrBuilder ArgAttrs(Memset->getContext(),
+ Memset->getAttributes().getParamAttrs(0));
+ MemsetPattern16Call->setAttributes(
+ MemsetPattern16Call->getAttributes().addParamAttributes(
+ Memset->getContext(), 0, ArgAttrs));
Changed = true;
Memset->eraseFromParent();
break;
More information about the llvm-commits
mailing list