[llvm] [InstCombine] Fold align assume into load's !align metadata if possible. (PR #108958)

Yingwei Zheng via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 17 08:39:33 PDT 2024


dtcxzyw wrote:

> Based on https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1320/files it looks like we end up losing the alignment information in ~most cases? Presumably because SROA later comes along and removes the load.
> 
> Note that for the existing non-null handling, SROA will actually rematerialize the nonnull assumption. But I'm reasonably confident that doing that for `!align` would have terrible effects, at least for frontends that use `!align` a lot.

PhaseOrdering reproducer:
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #0

define i32 @_ZN4llvm7support6endian8readNextIjLNS_10endiannessE1ELm0EhEET_RPKT2_(ptr %0) {
  %2 = call i32 @_ZN4llvm7support6endian8readNextIjLm0EhEET_RPKT1_NS_10endiannessE(ptr %0)
  ret i32 0
}

define i32 @_ZN4llvm7support6endian8readNextIjLm0EhEET_RPKT1_NS_10endiannessE(ptr %0) {
  %2 = load ptr, ptr %0, align 8
  %3 = call i32 @_ZN4llvm7support6endian4readIjLm0EEET_PKvNS_10endiannessE(ptr %2)
  %4 = load ptr, ptr %0, align 8
  %5 = getelementptr i8, ptr %4, i64 4
  store ptr %5, ptr %0, align 8
  ret i32 0
}

define i32 @_ZN4llvm7support6endian4readIjLm0EEET_PKvNS_10endiannessE(ptr %0) {
  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
  %.0.copyload = load i32, ptr %0, align 1
  %2 = call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload)
  ret i32 0
}

declare i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32)

define i64 @_ZN4llvm22OnDiskChainedHashTableIN12_GLOBAL__N_126IdentifierIndexReaderTraitEE24readNumBucketsAndEntriesERPKh(ptr %0) {
  %2 = call i32 @_ZN4llvm7support6endian8readNextIjLNS_10endiannessE1ELm0EhEET_RPKT2_(ptr %0)
  %3 = call i32 @_ZN4llvm7support6endian8readNextIjLNS_10endiannessE1ELm0EhEET_RPKT2_(ptr %0)
  ret i64 0
}

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
```
Before:
```
define noundef i64 @_ZN4llvm22OnDiskChainedHashTableIN12_GLOBAL__N_126IdentifierIndexReaderTraitEE24readNumBucketsAndEntriesERPKh(ptr nocapture %0) local_unnamed_addr {
  %2 = load ptr, ptr %0, align 8
  call void @llvm.assume(i1 true) [ "align"(ptr %2, i64 4) ]
  %.0.copyload.i.i.i = load i32, ptr %2, align 4
  %3 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i)
  %4 = load ptr, ptr %0, align 8
  %5 = getelementptr i8, ptr %4, i64 4
  store ptr %5, ptr %0, align 8
  call void @llvm.assume(i1 true) [ "align"(ptr %5, i64 4) ]
  %.0.copyload.i.i.i1 = load i32, ptr %5, align 4
  %6 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i1)
  %7 = load ptr, ptr %0, align 8
  %8 = getelementptr i8, ptr %7, i64 4
  store ptr %8, ptr %0, align 8
  ret i64 0
}
```
After:
```
define noundef i64 @_ZN4llvm22OnDiskChainedHashTableIN12_GLOBAL__N_126IdentifierIndexReaderTraitEE24readNumBucketsAndEntriesERPKh(ptr nocapture %0) local_unnamed_addr {
  %2 = load ptr, ptr %0, align 8, !align !0
  %.0.copyload.i.i.i = load i32, ptr %2, align 4
  %3 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i)
  %4 = load ptr, ptr %0, align 8
  %5 = getelementptr i8, ptr %4, i64 4
  store ptr %5, ptr %0, align 8
  %.0.copyload.i.i.i1 = load i32, ptr %5, align 1
  %6 = tail call i32 @_ZN4llvm7support6endian9byte_swapIjEET_S3_NS_10endiannessE(i32 %.0.copyload.i.i.i1)
  %7 = load ptr, ptr %0, align 8
  %8 = getelementptr i8, ptr %7, i64 4
  store ptr %8, ptr %0, align 8
  ret i64 0
}
```

https://github.com/llvm/llvm-project/pull/108958


More information about the llvm-commits mailing list