[llvm] r249523 - [EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.

Arnaud A. de Grandmaison via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 7 00:41:30 PDT 2015


Author: aadg
Date: Wed Oct  7 02:41:29 2015
New Revision: 249523

URL: http://llvm.org/viewvc/llvm-project?rev=249523&view=rev
Log:
[EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.

Summary:
Some target intrinsics can access multiple elements, using the pointer as a
base address (e.g. AArch64 ld4). When trying to CSE such instructions,
it must be checked the available value comes from a compatible instruction
because the pointer is not enough to discriminate whether the value is
correct.

Reviewers: ssijaric

Subscribers: mcrosier, llvm-commits, aemerson

Differential Revision: http://reviews.llvm.org/D13475

Added:
    llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp?rev=249523&r1=249522&r2=249523&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/EarlyCSE.cpp Wed Oct  7 02:41:29 2015
@@ -290,12 +290,19 @@ public:
   /// current generation count.  The current generation count is incremented
   /// after every possibly writing memory operation, which ensures that we only
   /// CSE loads with other loads that have no intervening store.
-  typedef RecyclingAllocator<
-      BumpPtrAllocator,
-      ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+  struct LoadValue {
+    Value *data;
+    unsigned generation;
+    int matchingId;
+    LoadValue() : data(nullptr), generation(0), matchingId(-1) {}
+    LoadValue(Value *data, unsigned generation, unsigned matchingId)
+        : data(data), generation(generation), matchingId(matchingId) {}
+  };
+  typedef RecyclingAllocator<BumpPtrAllocator,
+                             ScopedHashTableVal<Value *, LoadValue>>
       LoadMapAllocator;
-  typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
-                          DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+  typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+                          LoadMapAllocator> LoadHTType;
   LoadHTType AvailableLoads;
 
   /// \brief A scoped hash table of the current values of read-only call
@@ -560,13 +567,13 @@ bool EarlyCSE::processNode(DomTreeNode *
 
       // If we have an available version of this load, and if it is the right
       // generation, replace this instruction.
-      std::pair<Value *, unsigned> InVal =
-          AvailableLoads.lookup(MemInst.getPtr());
-      if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
-        Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr());
+      if (InVal.data != nullptr && InVal.generation == CurrentGeneration &&
+          InVal.matchingId == MemInst.getMatchingId()) {
+        Value *Op = getOrCreateResult(InVal.data, Inst->getType());
         if (Op != nullptr) {
           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
-                       << "  to: " << *InVal.first << '\n');
+                       << "  to: " << *InVal.data << '\n');
           if (!Inst->use_empty())
             Inst->replaceAllUsesWith(Op);
           Inst->eraseFromParent();
@@ -577,8 +584,9 @@ bool EarlyCSE::processNode(DomTreeNode *
       }
 
       // Otherwise, remember that we have this instruction.
-      AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
-                                                  Inst, CurrentGeneration));
+      AvailableLoads.insert(
+          MemInst.getPtr(),
+          LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
       LastStore = nullptr;
       continue;
     }
@@ -652,8 +660,9 @@ bool EarlyCSE::processNode(DomTreeNode *
         // version of the pointer.  It is safe to forward from volatile stores
         // to non-volatile loads, so we don't have to check for volatility of
         // the store.
-        AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
-                                                    Inst, CurrentGeneration));
+        AvailableLoads.insert(
+            MemInst.getPtr(),
+            LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
 
         // Remember that this was the last store we saw for DSE.
         if (!MemInst.isVolatile())

Added: llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll?rev=249523&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll (added)
+++ llvm/trunk/test/Transforms/EarlyCSE/AArch64/ldstN.ll Wed Oct  7 02:41:29 2015
@@ -0,0 +1,18 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
+
+; Although the store and the ld4 are using the same pointer, the
+; data can not be reused because ld4 accesses multiple elements.
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
+entry:
+  store <4 x i16> undef, <4 x i16>* undef, align 8
+  %0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
+; CHECK-LABEL: @foo(
+; CHECK: store
+; CHECK-NEXT: call
+; CHECK-NEXT: ret
+}




More information about the llvm-commits mailing list