[llvm] [InstCombine] Fold align assume into load's !align metadata if possible. (PR #108958)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 08:39:33 PDT 2024
dtcxzyw wrote:
> Based on https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1320/files it looks like we end up losing the alignment information in ~most cases? Presumably because SROA later comes along and removes the load.
>
> Note that for the existing non-null handling, SROA will actually rematerialize the nonnull assumption. But I'm reasonably confident that doing that for `!align` would have terrible effects, at least for frontends that use `!align` a lot.
PhaseOrdering reproducer:
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #0
define i32 @_ZN4llvm7support6endian8readNextIjLNS_10endiannessE1ELm0EhEET_RPKT2_(ptr %0) {
%2 = call i32 @_ZN4llvm7support6endian8readNextIjLm0EhEET_RPKT1_NS_10endiannessE(ptr %0)
ret i32 0
}
define i32 @_ZN4llvm7support6endian8readNextIjLm0EhEET_RPKT1_NS_10endiannessE(ptr %0) {
%2 = load ptr, ptr %0, align 8
%3 = call i32 @_ZN4llvm7support6endian4readIjLm0EEET_PKvNS_10endiannessE(ptr %2)
%4 = load ptr, ptr %0, align 8
%5 = getelementptr i8, ptr %4, i64 4
store ptr %5, ptr %0, align 8
ret i32 0
}
define i32 @_ZN4llvm7support6endian4readIjLm0EEET_PKvNS_10endiannessE(ptr %0) {
call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
%.0.copyload = load i32, ptr %0, align 1
%2 = call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload)
ret i32 0
}
declare i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32)
define i64 @_ZN4llvm22OnDiskChainedHashTableIN12_GLOBAL__N_126IdentifierIndexReaderTraitEE24readNumBucketsAndEntriesERPKh(ptr %0) {
%2 = call i32 @_ZN4llvm7support6endian8readNextIjLNS_10endiannessE1ELm0EhEET_RPKT2_(ptr %0)
%3 = call i32 @_ZN4llvm7support6endian8readNextIjLNS_10endiannessE1ELm0EhEET_RPKT2_(ptr %0)
ret i64 0
}
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
```
Before:
```
define noundef i64 @_ZN4llvm22OnDiskChainedHashTableIN12_GLOBAL__N_126IdentifierIndexReaderTraitEE24readNumBucketsAndEntriesERPKh(ptr nocapture %0) local_unnamed_addr {
%2 = load ptr, ptr %0, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %2, i64 4) ]
%.0.copyload.i.i.i = load i32, ptr %2, align 4
%3 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i)
%4 = load ptr, ptr %0, align 8
%5 = getelementptr i8, ptr %4, i64 4
store ptr %5, ptr %0, align 8
call void @llvm.assume(i1 true) [ "align"(ptr %5, i64 4) ]
%.0.copyload.i.i.i1 = load i32, ptr %5, align 4
%6 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i1)
%7 = load ptr, ptr %0, align 8
%8 = getelementptr i8, ptr %7, i64 4
store ptr %8, ptr %0, align 8
ret i64 0
}
```
After:
```
define noundef i64 @_ZN4llvm22OnDiskChainedHashTableIN12_GLOBAL__N_126IdentifierIndexReaderTraitEE24readNumBucketsAndEntriesERPKh(ptr nocapture %0) local_unnamed_addr {
%2 = load ptr, ptr %0, align 8, !align !0
%.0.copyload.i.i.i = load i32, ptr %2, align 4
%3 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i)
%4 = load ptr, ptr %0, align 8
%5 = getelementptr i8, ptr %4, i64 4
store ptr %5, ptr %0, align 8
%.0.copyload.i.i.i1 = load i32, ptr %5, align 1
%6 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i1)
%7 = load ptr, ptr %0, align 8
%8 = getelementptr i8, ptr %7, i64 4
store ptr %8, ptr %0, align 8
ret i64 0
}
```
https://github.com/llvm/llvm-project/pull/108958
More information about the llvm-commits
mailing list