[llvm] [PreISelIntrinsicLowering] Support producing memset_pattern16 when loading from constant global (PR #129220)

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 28 02:15:59 PST 2025


https://github.com/asb created https://github.com/llvm/llvm-project/pull/129220

This is motivated by #126736, and catches a case that would have
resulted in memset_pattern16 being produced by LoopIdiomRecognize
previously but is missed after moving to the intrinsic in #126736 and
relying on PreISelintrinsicLoewring to produce the libcall when
available.

The logic for handling load instructions that access constant globals
could be made more extensive, but it's not clear it would be worthwhile.
For now we prioritise the patterns that could be produced by
LoopIdiomRecognize.

>From a31332222903b8208b896c0ca2c58347bff19e29 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Fri, 28 Feb 2025 10:14:03 +0000
Subject: [PATCH 1/2] [PreISelIntrinsicLowering][test] Precommit test for
 failure to select memset_pattern16 for loaded constant val

---
 .../X86/memset-pattern.ll                     | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 7cfdcb8578809..839603e17bcb9 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -2,6 +2,8 @@
 ; RUN: opt -mtriple=x86_64-apple-darwin10.0.0 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
 
 ;.
+; CHECK: @G = global i32 5
+; CHECK: @ptr_pat = private unnamed_addr constant ptr @G, align 8
 ; CHECK: @.memset_pattern = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
 ; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
 ; CHECK: @.memset_pattern.2 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
@@ -144,6 +146,78 @@ define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
 !7 = !{!"omnipotent char", !8, i64 0}
 !8 = !{!"Simple C++ TBAA"}
 
+ at G = global i32 5
+ at ptr_pat = private unnamed_addr constant ptr @G, align 8
+
+; FIXME: memset_pattern16 should be selected.
+define void @memset_pattern_i64_16_fromptr(ptr %a) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_16_fromptr(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
+; CHECK-NEXT:    br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK:       [[LOADSTORELOOP]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 16
+; CHECK-NEXT:    br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    ret void
+;
+  %1 = load i64, ptr @ptr_pat, align 8
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 16, i1 false)
+  ret void
+}
+
+; FIXME: memset_pattern16 should be selected.
+define void @memset_pattern_i64_x_fromptr(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_x_fromptr(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 0, [[X]]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK:       [[LOADSTORELOOP]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP5]] = add i64 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    ret void
+;
+  %1 = load i64, ptr @ptr_pat, align 8
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 %x, i1 false)
+  ret void
+}
+
+ at nonconst_ptr_pat = private unnamed_addr global ptr @G, align 8
+
+; memset_pattern16 shouldn't be used for this example (at least not by just
+; creating a constantarray global at compile tiem), as the global isn't
+; constant.
+define void @memset_pattern_i64_x_fromnonconstptr(ptr %a, i64 %x) nounwind {
+; CHECK-LABEL: define void @memset_pattern_i64_x_fromnonconstptr(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @nonconst_ptr_pat, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 0, [[X]]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
+; CHECK:       [[LOADSTORELOOP]]:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOADSTORELOOP]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-NEXT:    store i64 [[TMP1]], ptr [[TMP4]], align 1
+; CHECK-NEXT:    [[TMP5]] = add i64 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
+; CHECK-NEXT:    br i1 [[TMP6]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
+; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    ret void
+;
+  %1 = load i64, ptr @nonconst_ptr_pat, align 8
+  tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 %x, i1 false)
+  ret void
+}
+
 ;.
 ; CHECK: attributes #[[ATTR0]] = { nounwind }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }

>From 7dea1458faeafcd64dd0aca385aaeb365673f8df Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Fri, 28 Feb 2025 10:14:04 +0000
Subject: [PATCH 2/2] [PreISelIntrinsicLowering] Support producing
 memset_pattern16 when loading from constant global

This is motivated by #126736, and catches a case that would have
resulted in memset_pattern16 being produced by LoopIdiomRecognize
previously but is missed after moving to the intrinsic in #126736 and
relying on PreISelintrinsicLoewring to produce the libcall when
available.

The logic for handling load instructions that access constant globals
could be made more extensive, but it's not clear it would be worthwhile.
For now we prioritise the patterns that could be produced by
LoopIdiomRecognize.
---
 llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp | 17 +++++++-
 .../X86/memset-pattern.ll                     | 43 ++++++-------------
 2 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 27fa0b43d74f6..458e52bcb7c68 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -254,10 +254,23 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
   if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
     return nullptr;
 
+  // If V is a load instruction that loads from a constant global then attempt
+  // to use that constant to produce the pattern.
+  Constant *C = nullptr;
+  if (auto *LI = dyn_cast<LoadInst>(V)) {
+    if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand())) {
+      if (GV->isConstant() && GV->hasInitializer()) {
+        C = GV->getInitializer();
+      }
+    }
+  }
+
+  if (!C)
+    C = dyn_cast<Constant>(V);
+
   // If the value isn't a constant, we can't promote it to being in a constant
   // array.  We could theoretically do a store to an alloca or something, but
   // that doesn't seem worthwhile.
-  Constant *C = dyn_cast<Constant>(V);
   if (!C || isa<ConstantExpr>(C))
     return nullptr;
 
@@ -284,7 +297,7 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
 
   // Otherwise, we'll use an array of the constants.
   uint64_t ArraySize = 16 / Size;
-  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  ArrayType *AT = ArrayType::get(C->getType(), ArraySize);
   return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
 }
 
diff --git a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
index 839603e17bcb9..e44007f736370 100644
--- a/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
+++ b/llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll
@@ -4,13 +4,16 @@
 ;.
 ; CHECK: @G = global i32 5
 ; CHECK: @ptr_pat = private unnamed_addr constant ptr @G, align 8
+; CHECK: @nonconst_ptr_pat = private unnamed_addr global ptr @G, align 8
 ; CHECK: @.memset_pattern = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
 ; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
-; CHECK: @.memset_pattern.2 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
-; CHECK: @.memset_pattern.3 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
-; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
+; CHECK: @.memset_pattern.3 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
+; CHECK: @.memset_pattern.4 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
 ; CHECK: @.memset_pattern.5 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
 ; CHECK: @.memset_pattern.6 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.7 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
+; CHECK: @.memset_pattern.8 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
 ;.
 define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -33,7 +36,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
 define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_1(
 ; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 16)
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 16)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -61,7 +64,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
 define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
 ; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.4, i64 16)
+; CHECK-NEXT:    call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.6, i64 16)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -71,7 +74,7 @@ define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nou
 define void @memset_pattern_i128_16(ptr %a) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_16(
 ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 256)
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.7, i64 256)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -82,7 +85,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i128_x(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 16, [[X]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.6, i64 [[TMP1]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.8, i64 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
@@ -112,7 +115,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i16_x(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 2, [[X]]
-; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -149,20 +152,11 @@ define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
 @G = global i32 5
 @ptr_pat = private unnamed_addr constant ptr @G, align 8
 
-; FIXME: memset_pattern16 should be selected.
 define void @memset_pattern_i64_16_fromptr(ptr %a) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i64_16_fromptr(
 ; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
-; CHECK-NEXT:    br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
-; CHECK:       [[LOADSTORELOOP]]:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
-; CHECK-NEXT:    store i64 [[TMP1]], ptr [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 16
-; CHECK-NEXT:    br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
-; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 128)
 ; CHECK-NEXT:    ret void
 ;
   %1 = load i64, ptr @ptr_pat, align 8
@@ -170,21 +164,12 @@ define void @memset_pattern_i64_16_fromptr(ptr %a) nounwind {
   ret void
 }
 
-; FIXME: memset_pattern16 should be selected.
 define void @memset_pattern_i64_x_fromptr(ptr %a, i64 %x) nounwind {
 ; CHECK-LABEL: define void @memset_pattern_i64_x_fromptr(
 ; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 0, [[X]]
-; CHECK-NEXT:    br i1 [[TMP2]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
-; CHECK:       [[LOADSTORELOOP]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOADSTORELOOP]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT:    store i64 [[TMP1]], ptr [[TMP4]], align 1
-; CHECK-NEXT:    [[TMP5]] = add i64 [[TMP3]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
-; CHECK-NEXT:    br i1 [[TMP6]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
-; CHECK:       [[SPLIT]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 8, [[X]]
+; CHECK-NEXT:    call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
   %1 = load i64, ptr @ptr_pat, align 8



More information about the llvm-commits mailing list