[llvm] [AggressiveInstCombine] Use AA during store merge (PR #149992)

Tue Jul 22 03:23:46 PDT 2025

https://github.com/nikic created https://github.com/llvm/llvm-project/pull/149992

This is a small extension of #147540, resolving one of the FIXMEs. Instead of bailing out on any instruction that may read/write memory, use AA to check whether it can alias the stored parts. Do this using a crude check based on the underlying object only.

This pattern occurs rarely in practice (https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2597 shows it only in llvm), but at the same time it also doesn't seem to add any compile-time cost (https://llvm-compile-time-tracker.com/compare.php?from=cb8b0cd2cfbe817253f2679df53dd7926a7e1894&to=c4ab835e1d9b040ba5882238fe3e56ff32d314a8&stat=instructions:u), so it's probably worth handling.

>From 261fff321d7256556a216fa3ea4f425db3ea4c10 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 22 Jul 2025 11:23:42 +0200
Subject: [PATCH] [AggressiveInstCombine] Use AA during store merge

This is a small extension of #147540, resolving one of the FIXME.
Instead of bailing out on any instruction that may read/write
memory, use AA to check whether it can alias the stored parts.
Do this using a crude check based on the underlying object only.
---
 .../AggressiveInstCombine.cpp                      | 10 ++++++++--
 .../AggressiveInstCombine/X86/store-merge.ll       | 14 ++------------
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7fa6e6c5161cf..7af5ba4e0e103 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -965,6 +965,7 @@ static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,
   if (DL.isBigEndian())
     return false;
 
+  BatchAAResults BatchAA(AA);
   SmallVector<PartStore, 8> Parts;
   bool MadeChange = false;
   for (Instruction &I : make_early_inc_range(BB)) {
@@ -980,8 +981,13 @@ static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL,
       continue;
     }
 
-    // FIXME: Use AA to make this more precise.
-    if (I.mayReadOrWriteMemory() || I.mayThrow()) {
+    if (Parts.empty())
+      continue;
+
+    if (I.mayThrow() ||
+        (I.mayReadOrWriteMemory() &&
+         isModOrRefSet(BatchAA.getModRefInfo(
+             &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
       MadeChange |= mergePartStores(Parts, DL, TTI);
       Parts.clear();
       continue;
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
index 4ab8d18eb69b5..56786d0f9def0 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll
@@ -359,13 +359,8 @@ define void @test_aliasing_store(i16 %x, ptr %p, ptr %p2) {
 define void @test_non_aliasing_store(i16 %x, ptr noalias %p, ptr noalias %p2) {
 ; CHECK-LABEL: define void @test_non_aliasing_store(
 ; CHECK-SAME: i16 [[X:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) {
-; CHECK-NEXT:    [[X_0:%.*]] = trunc i16 [[X]] to i8
-; CHECK-NEXT:    store i8 [[X_0]], ptr [[P]], align 1
+; CHECK-NEXT:    store i16 [[X]], ptr [[P]], align 1
 ; CHECK-NEXT:    store i8 0, ptr [[P2]], align 1
-; CHECK-NEXT:    [[SHR_1:%.*]] = lshr i16 [[X]], 8
-; CHECK-NEXT:    [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
-; CHECK-NEXT:    store i8 [[X_1]], ptr [[GEP_1]], align 1
 ; CHECK-NEXT:    ret void
 ;
   %x.0 = trunc i16 %x to i8
@@ -403,13 +398,8 @@ define i8 @test_aliasing_load(i16 %x, ptr %p, ptr %p2) {
 define i8 @test_non_aliasing_load(i16 %x, ptr noalias %p, ptr noalias %p2) {
 ; CHECK-LABEL: define i8 @test_non_aliasing_load(
 ; CHECK-SAME: i16 [[X:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) {
-; CHECK-NEXT:    [[X_0:%.*]] = trunc i16 [[X]] to i8
-; CHECK-NEXT:    store i8 [[X_0]], ptr [[P]], align 1
+; CHECK-NEXT:    store i16 [[X]], ptr [[P]], align 1
 ; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[P2]], align 1
-; CHECK-NEXT:    [[SHR_1:%.*]] = lshr i16 [[X]], 8
-; CHECK-NEXT:    [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1
-; CHECK-NEXT:    store i8 [[X_1]], ptr [[GEP_1]], align 1
 ; CHECK-NEXT:    ret i8 [[V]]
 ;
   %x.0 = trunc i16 %x to i8