[llvm] [InstCombine] Support offsets in `memset` to load forwarding (PR #151924)

Pedro Lobo via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 4 10:04:48 PDT 2025


https://github.com/pedroclobo updated https://github.com/llvm/llvm-project/pull/151924

>From 5f56ddb4ebd5a67b42d72ba2c9c119e581c9e6f4 Mon Sep 17 00:00:00 2001
From: Pedro Lobo <pedro.lobo at tecnico.ulisboa.pt>
Date: Sun, 3 Aug 2025 20:38:38 +0100
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests

---
 .../InstCombine/load-store-forward.ll         | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll
index 9a5db318df5e7..b120abbf67d46 100644
--- a/llvm/test/Transforms/InstCombine/load-store-forward.ll
+++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll
@@ -379,6 +379,45 @@ define i32 @load_after_memset_0_offset(ptr %a) {
   ret i32 %v
 }
 
+define i32 @load_after_memset_1_offset(ptr %a) {
+; CHECK-LABEL: @load_after_memset_1_offset(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
+; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
+  %gep = getelementptr i8, ptr %a, i64 4
+  %v = load i32, ptr %gep
+  ret i32 %v
+}
+
+define i1 @load_after_memset_0_offset_i1(ptr %a) {
+; CHECK-LABEL: @load_after_memset_0_offset_i1(
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 12
+; CHECK-NEXT:    [[V:%.*]] = load i1, ptr [[GEP]], align 1
+; CHECK-NEXT:    ret i1 [[V]]
+;
+  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
+  %gep = getelementptr i1, ptr %a, i64 12
+  %v = load i1, ptr %gep
+  ret i1 %v
+}
+
+define i8 @neg_load_after_memset_0_neg_offset(ptr %a) {
+; CHECK-LABEL: @neg_load_after_memset_0_neg_offset(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 2
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[GEP]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[A]], align 1
+; CHECK-NEXT:    ret i8 [[V]]
+;
+  %gep = getelementptr i8, ptr %a, i64 2
+  call void @llvm.memset.p0.i64(ptr %gep, i8 0, i64 16, i1 false)
+  %v = load i8, ptr %a
+  ret i8 %v
+}
+
 define i32 @load_after_memset_0_offset_too_large(ptr %a) {
 ; CHECK-LABEL: @load_after_memset_0_offset_too_large(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)

>From 0eed6ec26fd555576a304e56161a115f017ec8dd Mon Sep 17 00:00:00 2001
From: Pedro Lobo <pedro.lobo at tecnico.ulisboa.pt>
Date: Sun, 3 Aug 2025 20:39:35 +0100
Subject: [PATCH 2/2] [InstCombine] Support offsets in `memset` to load
 forwarding

Adds support for load offsets when performing `memset` to load
forwarding.
---
 llvm/lib/Analysis/Loads.cpp                         | 12 ++++++++----
 llvm/test/Analysis/GlobalsModRef/memset-escape.ll   | 13 ++-----------
 .../Transforms/InstCombine/load-store-forward.ll    | 13 +++----------
 3 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index da76f5b3d498a..78d0887d5d87e 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -631,9 +631,13 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
     if (!Val || !Len)
       return nullptr;
 
-    // TODO: Handle offsets.
-    Value *Dst = MSI->getDest();
-    if (!AreEquivalentAddressValues(Dst, Ptr))
+    // Handle offsets.
+    int64_t StoreOffset = 0, LoadOffset = 0;
+    const Value *StoreBase =
+        GetPointerBaseWithConstantOffset(MSI->getDest(), StoreOffset, DL);
+    const Value *LoadBase =
+        GetPointerBaseWithConstantOffset(Ptr, LoadOffset, DL);
+    if (StoreBase != LoadBase || LoadOffset < StoreOffset)
       return nullptr;
 
     if (IsLoadCSE)
@@ -645,7 +649,7 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
 
     // Make sure the read bytes are contained in the memset.
     uint64_t LoadSize = LoadTypeSize.getFixedValue();
-    if ((Len->getValue() * 8).ult(LoadSize))
+    if ((Len->getValue() * 8).ult(LoadSize + (LoadOffset - StoreOffset) * 8))
       return nullptr;
 
     APInt Splat = LoadSize >= 8 ? APInt::getSplat(LoadSize, Val->getValue())
diff --git a/llvm/test/Analysis/GlobalsModRef/memset-escape.ll b/llvm/test/Analysis/GlobalsModRef/memset-escape.ll
index 77652a6278aae..0bdc4152de1c9 100644
--- a/llvm/test/Analysis/GlobalsModRef/memset-escape.ll
+++ b/llvm/test/Analysis/GlobalsModRef/memset-escape.ll
@@ -7,23 +7,14 @@ target triple = "x86_64-apple-macosx10.10.0"
 @a = internal global [3 x i32] zeroinitializer, align 4
 @b = common global i32 0, align 4
 
-; The important thing we're checking for here is the reload of (some element of)
-; @a after the memset.
+; The important thing we're checking here is that the value from the memset
+; rather than the preceding store is forwarded.
 
 define i32 @main() {
 ; CHECK-LABEL: define noundef i32 @main(
 ; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    store i32 1, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 4
-; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(12) @a, i8 0, i64 12, i1 false)
 ; CHECK-NEXT:    store i32 3, ptr @b, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr @a, i64 8), align 4
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[CMP1_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
-; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    tail call void @abort()
-; CHECK-NEXT:    unreachable
-; CHECK:       [[IF_END]]:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll
index b120abbf67d46..0f03f16062e52 100644
--- a/llvm/test/Transforms/InstCombine/load-store-forward.ll
+++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll
@@ -365,13 +365,10 @@ define i32 @load_after_memset_unknown(ptr %a, i8 %byte) {
   ret i32 %v
 }
 
-; TODO: Handle load at offset.
 define i32 @load_after_memset_0_offset(ptr %a) {
 ; CHECK-LABEL: @load_after_memset_0_offset(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    ret i32 [[V]]
+; CHECK-NEXT:    ret i32 0
 ;
   call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
   %gep = getelementptr i8, ptr %a, i64 4
@@ -382,9 +379,7 @@ define i32 @load_after_memset_0_offset(ptr %a) {
 define i32 @load_after_memset_1_offset(ptr %a) {
 ; CHECK-LABEL: @load_after_memset_1_offset(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
-; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[GEP]], align 4
-; CHECK-NEXT:    ret i32 [[V]]
+; CHECK-NEXT:    ret i32 16843009
 ;
   call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
   %gep = getelementptr i8, ptr %a, i64 4
@@ -395,9 +390,7 @@ define i32 @load_after_memset_1_offset(ptr %a) {
 define i1 @load_after_memset_0_offset_i1(ptr %a) {
 ; CHECK-LABEL: @load_after_memset_0_offset_i1(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 12
-; CHECK-NEXT:    [[V:%.*]] = load i1, ptr [[GEP]], align 1
-; CHECK-NEXT:    ret i1 [[V]]
+; CHECK-NEXT:    ret i1 false
 ;
   call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
   %gep = getelementptr i1, ptr %a, i64 12



More information about the llvm-commits mailing list