[llvm] c7c6c0a - [AggressiveInstCombine] Fix memory location for alias analysis (#169953)

Mon Dec 1 06:46:19 PST 2025

Author: Yingwei Zheng
Date: 2025-12-01T22:46:16+08:00
New Revision: c7c6c0a45c1d840d05b414d73f7bab5136dcb8c2

URL: https://github.com/llvm/llvm-project/commit/c7c6c0a45c1d840d05b414d73f7bab5136dcb8c2
DIFF: https://github.com/llvm/llvm-project/commit/c7c6c0a45c1d840d05b414d73f7bab5136dcb8c2.diff

LOG: [AggressiveInstCombine] Fix memory location for alias analysis (#169953)

When LOps.RootInsert comes after LI2, since we use LI2 as the new insert
point, we should make sure the memory region accessed by LOps isn't
modified. However, the original implementation passes the bit width
`LOps.LoadSize` as the number of bytes to be accessed, causing BasicAA
to return NoAlias:

https://github.com/llvm/llvm-project/blob/a941e150749650e6a75e948f10d46b0bedcc128b/llvm/lib/Analysis/BasicAliasAnalysis.cpp#L1658-L1667
With `-aa-trace`, we get:
```
End ptr getelementptr inbounds nuw (i8, ptr @g, i64 4) @ LocationSize::precise(1),   %gep1 = getelementptr i8, ptr %p, i64 4 @ LocationSize::precise(32) = NoAlias
```
This patch uses `getTypeStoreSize` to compute the correct access size
for LOps. Instead of modifying the MemoryLocation for End (i.e.,
`LOps.RootInsert`), it also uses the computed base and AATag for
correctness.

Closes https://github.com/llvm/llvm-project/issues/169921.

Added: 
    

Modified: 
    llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
    llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7ed8fb68f107e..2397133fa61ef 100644

--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -710,9 +710,17 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
   MemoryLocation Loc;
   if (!Start->comesBefore(End)) {
     std::swap(Start, End);
-    Loc = MemoryLocation::get(End);
+    // If LOps.RootInsert comes after LI2, since we use LI2 as the new insert
+    // point, we should make sure whether the memory region accessed by LOps
+    // isn't modified.
     if (LOps.FoundRoot)
-      Loc = Loc.getWithNewSize(LOps.LoadSize);
+      Loc = MemoryLocation(
+          LOps.Root->getPointerOperand(),
+          LocationSize::precise(DL.getTypeStoreSize(
+              IntegerType::get(LI1->getContext(), LOps.LoadSize))),
+          LOps.AATags);
+    else
+      Loc = MemoryLocation::get(End);
   } else
     Loc = MemoryLocation::get(End);
   unsigned NumScanned = 0;

diff  --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 46ec9e0a50842..f62a1ca15729b 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -2505,3 +2505,56 @@ entry:
   %or = or disjoint i32 %shl, %conv.2
   ret i32 %or
 }
+
+ at g = global i64 1060856922120
+
+; Make sure we use the correct memory location for alias analysis.
+define i64 @loadcombine_consecutive_mayalias(ptr %p) {
+; LE-LABEL: @loadcombine_consecutive_mayalias(
+; LE-NEXT:  entry:
+; LE-NEXT:    [[LOAD3:%.*]] = load i32, ptr [[P:%.*]], align 4
+; LE-NEXT:    [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; LE-NEXT:    store i8 0, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 4
+; LE-NEXT:    [[LOAD2:%.*]] = load i32, ptr [[GEP1]], align 4
+; LE-NEXT:    [[TMP0:%.*]] = zext i32 [[LOAD2]] to i64
+; LE-NEXT:    [[TMP1:%.*]] = shl i64 [[TMP0]], 32
+; LE-NEXT:    [[ZEXT3:%.*]] = zext i32 [[LOAD3]] to i64
+; LE-NEXT:    [[LOAD1:%.*]] = or i64 [[TMP1]], [[ZEXT3]]
+; LE-NEXT:    [[RES:%.*]] = lshr i64 [[LOAD1]], 32
+; LE-NEXT:    ret i64 [[RES]]
+;
+; BE-LABEL: @loadcombine_consecutive_mayalias(
+; BE-NEXT:  entry:
+; BE-NEXT:    [[LOAD1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; BE-NEXT:    [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 4
+; BE-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr [[P]], i64 5
+; BE-NEXT:    store i8 0, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 4
+; BE-NEXT:    [[LOAD2:%.*]] = load i8, ptr [[GEP1]], align 4
+; BE-NEXT:    [[LOAD3:%.*]] = load i24, ptr [[GEP2]], align 1
+; BE-NEXT:    [[ZEXT1:%.*]] = zext i24 [[LOAD3]] to i64
+; BE-NEXT:    [[SHL1:%.*]] = shl i64 [[ZEXT1]], 40
+; BE-NEXT:    [[ZEXT2:%.*]] = zext i8 [[LOAD2]] to i64
+; BE-NEXT:    [[SHL2:%.*]] = shl i64 [[ZEXT2]], 32
+; BE-NEXT:    [[OR1:%.*]] = or i64 [[SHL1]], [[SHL2]]
+; BE-NEXT:    [[ZEXT3:%.*]] = zext i32 [[LOAD1]] to i64
+; BE-NEXT:    [[OR2:%.*]] = or i64 [[OR1]], [[ZEXT3]]
+; BE-NEXT:    [[RES:%.*]] = lshr i64 [[OR2]], 32
+; BE-NEXT:    ret i64 [[RES]]
+;
+entry:
+  %load1 = load i32, ptr %p, align 4
+  %gep1 = getelementptr i8, ptr %p, i64 4
+  %gep2 = getelementptr i8, ptr %p, i64 5
+  store i8 0, ptr getelementptr inbounds nuw (i8, ptr @g, i64 4), align 4
+  %load2 = load i8, ptr %gep1, align 4
+  %load3 = load i24, ptr %gep2, align 1
+  %zext1 = zext i24 %load3 to i64
+  %shl1 = shl i64 %zext1, 40
+  %zext2 = zext i8 %load2 to i64
+  %shl2 = shl i64 %zext2, 32
+  %or1 = or i64 %shl1, %shl2
+  %zext3 = zext i32 %load1 to i64
+  %or2 = or i64 %or1, %zext3
+  %res = lshr i64 %or2, 32
+  ret i64 %res
+}