[llvm] [PreISelIntrinsicLowering] Support producing memset_pattern16 when loading from constant global (PR #129220)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 02:16:34 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alex Bradbury (asb)
<details>
<summary>Changes</summary>
This is motivated by #<!-- -->126736, and catches a case that would have
resulted in memset_pattern16 being produced by LoopIdiomRecognize
previously but is missed after moving to the intrinsic in #<!-- -->126736 and
relying on PreISelintrinsicLoewring to produce the libcall when
available.
The logic for handling load instructions that access constant globals
could be made more extensive, but it's not clear it would be worthwhile.
For now we prioritise the patterns that could be produced by
LoopIdiomRecognize.
---
Full diff: https://github.com/llvm/llvm-project/pull/129220.diff
2 Files Affected:
- (modified) llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp (+15-2)
- (modified) llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll (+67-8)
``````````diff
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 27fa0b43d74f6..458e52bcb7c68 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -254,10 +254,23 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
return nullptr;
+ // If V is a load instruction that loads from a constant global then attempt
+ // to use that constant to produce the pattern.
+ Constant *C = nullptr;
+ if (auto *LI = dyn_cast<LoadInst>(V)) {
+ if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand())) {
+ if (GV->isConstant() && GV->hasInitializer()) {
+ C = GV->getInitializer();
+ }
+ }
+ }
+
+ if (!C)
+ C = dyn_cast<Constant>(V);
+
// If the value isn't a constant, we can't promote it to being in a constant
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
- Constant *C = dyn_cast<Constant>(V);
if (!C || isa<ConstantExpr>(C))
return nullptr;
@@ -284,7 +297,7 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
// Otherwise, we'll use an array of the constants.
uint64_t ArraySize = 16 / Size;
- ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+ ArrayType *AT = ArrayType::get(C->getType(), ArraySize);
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
}
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 7cfdcb8578809..e44007f736370 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -2,13 +2,18 @@
; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
;.
+; CHECK: @G = global i32 5
+; CHECK: @ptr_pat = private unnamed_addr constant ptr @G, align 8
+; CHECK: @nonconst_ptr_pat = private unnamed_addr global ptr @G, align 8
; CHECK: @.memset_pattern = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
-; CHECK: @.memset_pattern.3 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
+; CHECK: @.memset_pattern.3 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
+; CHECK: @.memset_pattern.4 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
; CHECK: @.memset_pattern.5 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
; CHECK: @.memset_pattern.6 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.7 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.8 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
;.
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -31,7 +36,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -59,7 +64,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.4, i64 16)
+; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.6, i64 16)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -69,7 +74,7 @@ define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nou
define void @memset_pattern_i128_16(ptr %a) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 256)
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.7, i64 256)
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -80,7 +85,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.6, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.8, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
@@ -110,7 +115,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i16_x(
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -144,6 +149,60 @@ define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C++ TBAA"}
+ at G = global i32 5
+ at ptr_pat = private unnamed_addr constant ptr @G, align 8
+
+define void @memset_pattern_i64_16_fromptr(ptr %a) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_16_fromptr(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 128)
+; CHECK-NEXT: ret void
+;
+ %1 = load i64, ptr @ptr_pat, align 8
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 16, i1 false)
+ ret void
+}
+
+define void @memset_pattern_i64_x_fromptr(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_x_fromptr(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = mul i64 8, [[X]]
+; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP2]])
+; CHECK-NEXT: ret void
+;
+ %1 = load i64, ptr @ptr_pat, align 8
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 %x, i1 false)
+ ret void
+}
+
+ at nonconst_ptr_pat = private unnamed_addr global ptr @G, align 8
+
+; memset_pattern16 shouldn't be used for this example (at least not by just
+; creating a constantarray global at compile tiem), as the global isn't
+; constant.
+define void @memset_pattern_i64_x_fromnonconstptr(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_x_fromnonconstptr(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @nonconst_ptr_pat, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 0, [[X]]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK: [[LOADSTORELOOP]]:
+; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 1
+; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK: [[SPLIT]]:
+; CHECK-NEXT: ret void
+;
+ %1 = load i64, ptr @nonconst_ptr_pat, align 8
+ tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 %x, i1 false)
+ ret void
+}
+
;.
; CHECK: attributes #[[ATTR0]] = { nounwind }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
``````````
</details>
https://github.com/llvm/llvm-project/pull/129220
More information about the llvm-commits
mailing list