[llvm] r249523 - [EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.
Arnaud A. de Grandmaison via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 7 00:41:30 PDT 2015
Author: aadg
Date: Wed Oct 7 02:41:29 2015
New Revision: 249523
URL: http://llvm.org/viewvc/llvm-project?rev=249523&view=rev
Log:
[EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.
Summary:
Some target intrinsics can access multiple elements, using the pointer as a
base address (e.g. AArch64 ld4). When trying to CSE such instructions,
it must be checked the available value comes from a compatible instruction
because the pointer is not enough to discriminate whether the value is
correct.
Reviewers: ssijaric
Subscribers: mcrosier, llvm-commits, aemerson
Differential Revision: http://reviews.llvm.org/D13475
Added:
llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp?rev=249523&r1=249522&r2=249523&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp Wed Oct 7 02:41:29 2015
@@ -290,12 +290,19 @@ public:
/// current generation count. The current generation count is incremented
/// after every possibly writing memory operation, which ensures that we only
/// CSE loads with other loads that have no intervening store.
- typedef RecyclingAllocator<
- BumpPtrAllocator,
- ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+ struct LoadValue {
+ Value *data;
+ unsigned generation;
+ int matchingId;
+ LoadValue() : data(nullptr), generation(0), matchingId(-1) {}
+ LoadValue(Value *data, unsigned generation, unsigned matchingId)
+ : data(data), generation(generation), matchingId(matchingId) {}
+ };
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<Value *, LoadValue>>
LoadMapAllocator;
- typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
- DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+ typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+ LoadMapAllocator> LoadHTType;
LoadHTType AvailableLoads;
/// \brief A scoped hash table of the current values of read-only call
@@ -560,13 +567,13 @@ bool EarlyCSE::processNode(DomTreeNode *
// If we have an available version of this load, and if it is the right
// generation, replace this instruction.
- std::pair<Value *, unsigned> InVal =
- AvailableLoads.lookup(MemInst.getPtr());
- if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
- Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+ LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr());
+ if (InVal.data != nullptr && InVal.generation == CurrentGeneration &&
+ InVal.matchingId == MemInst.getMatchingId()) {
+ Value *Op = getOrCreateResult(InVal.data, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
- << " to: " << *InVal.first << '\n');
+ << " to: " << *InVal.data << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
Inst->eraseFromParent();
@@ -577,8 +584,9 @@ bool EarlyCSE::processNode(DomTreeNode *
}
// Otherwise, remember that we have this instruction.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
+ AvailableLoads.insert(
+ MemInst.getPtr(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
LastStore = nullptr;
continue;
}
@@ -652,8 +660,9 @@ bool EarlyCSE::processNode(DomTreeNode *
// version of the pointer. It is safe to forward from volatile stores
// to non-volatile loads, so we don't have to check for volatility of
// the store.
- AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
- Inst, CurrentGeneration));
+ AvailableLoads.insert(
+ MemInst.getPtr(),
+ LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
// Remember that this was the last store we saw for DSE.
if (!MemInst.isVolatile())
Added: llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll?rev=249523&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll (added)
+++ llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll Wed Oct 7 02:41:29 2015
@@ -0,0 +1,18 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
+
+; Although the store and the ld4 are using the same pointer, the
+; data can not be reused because ld4 accesses multiple elements.
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
+entry:
+ store <4 x i16> undef, <4 x i16>* undef, align 8
+ %0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
+ ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
+; CHECK-LABEL: @foo(
+; CHECK: store
+; CHECK-NEXT: call
+; CHECK-NEXT: ret
+}
More information about the llvm-commits
mailing list