[llvm] r242069 - Reduce memory usage of ComputeEditDistance() by (almost) 50%

Nico Weber nicolasweber at gmx.de
Mon Jul 13 14:33:21 PDT 2015


Author: nico
Date: Mon Jul 13 16:33:21 2015
New Revision: 242069

URL: http://llvm.org/viewvc/llvm-project?rev=242069&view=rev
Log:
Reduce memory usage of ComputeEditDistance() by (almost) 50%

ComputeEditDistance() currently keeps two rows of the edit distance matrix in
memory.  That's unnecessary, one row plus one additional element are sufficient.
With this change, strings up to 64 chars can be processed without going to the
heap, compared to 32 chars previously.  (But the main motivation is that the
code gets a bit simpler.)

No intended behavior change.

Modified:
    llvm/trunk/include/llvm/ADT/edit_distance.h

Modified: llvm/trunk/include/llvm/ADT/edit_distance.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ADT/edit_distance.h?rev=242069&r1=242068&r2=242069&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ADT/edit_distance.h (original)
+++ llvm/trunk/include/llvm/ADT/edit_distance.h Mon Jul 13 16:33:21 2015
@@ -50,50 +50,51 @@ unsigned ComputeEditDistance(ArrayRef<T>
   //   http://en.wikipedia.org/wiki/Levenshtein_distance
   //
   // Although the algorithm is typically described using an m x n
-  // array, only two rows are used at a time, so this implementation
-  // just keeps two separate vectors for those two rows.
+  // array, only one row plus one element are used at a time, so this
+  // implementation just keeps one vector for the row.  To update one entry,
+  // only the entries to the left, top, and top-left are needed.  The left
+  // entry is in Row[x-1], the top entry is what's in Row[x] from the last
+  // iteration, and the top-left entry is stored in Previous.
   typename ArrayRef<T>::size_type m = FromArray.size();
   typename ArrayRef<T>::size_type n = ToArray.size();
 
   const unsigned SmallBufferSize = 64;
   unsigned SmallBuffer[SmallBufferSize];
   std::unique_ptr<unsigned[]> Allocated;
-  unsigned *Previous = SmallBuffer;
-  if (2*(n + 1) > SmallBufferSize) {
-    Previous = new unsigned [2*(n+1)];
-    Allocated.reset(Previous);
+  unsigned *Row = SmallBuffer;
+  if (n + 1 > SmallBufferSize) {
+    Row = new unsigned[n + 1];
+    Allocated.reset(Row);
   }
-  unsigned *Current = Previous + (n + 1);
 
-  for (unsigned i = 0; i <= n; ++i)
-    Previous[i] = i;
+  for (unsigned i = 1; i <= n; ++i)
+    Row[i] = i;
 
   for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) {
-    Current[0] = y;
-    unsigned BestThisRow = Current[0];
+    Row[0] = y;
+    unsigned BestThisRow = Row[0];
 
+    unsigned Previous = y - 1;
     for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
+      int OldRow = Row[x];
       if (AllowReplacements) {
-        Current[x] = std::min(
-            Previous[x-1] + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
-            std::min(Current[x-1], Previous[x])+1);
+        Row[x] = std::min(
+            Previous + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
+            std::min(Row[x-1], Row[x])+1);
       }
       else {
-        if (FromArray[y-1] == ToArray[x-1]) Current[x] = Previous[x-1];
-        else Current[x] = std::min(Current[x-1], Previous[x]) + 1;
+        if (FromArray[y-1] == ToArray[x-1]) Row[x] = Previous;
+        else Row[x] = std::min(Row[x-1], Row[x]) + 1;
       }
-      BestThisRow = std::min(BestThisRow, Current[x]);
+      Previous = OldRow;
+      BestThisRow = std::min(BestThisRow, Row[x]);
     }
 
     if (MaxEditDistance && BestThisRow > MaxEditDistance)
       return MaxEditDistance + 1;
-
-    unsigned *tmp = Current;
-    Current = Previous;
-    Previous = tmp;
   }
 
-  unsigned Result = Previous[n];
+  unsigned Result = Row[n];
   return Result;
 }
 





More information about the llvm-commits mailing list