[Mlir-commits] [mlir] efb7727 - [mlir] Flag near misses in file splitting

Sun Dec 12 08:03:45 PST 2021

Author: Jacques Pienaar
Date: 2021-12-12T08:03:30-08:00
New Revision: efb7727a96ca8c02cc44ad83eea7c126db191d2b

URL: https://github.com/llvm/llvm-project/commit/efb7727a96ca8c02cc44ad83eea7c126db191d2b
DIFF: https://github.com/llvm/llvm-project/commit/efb7727a96ca8c02cc44ad83eea7c126db191d2b.diff

LOG: [mlir] Flag near misses in file splitting

Flags some potential cases where splitting isn't happening and so could result
in confusing results. Also update some test files where there were near misses
in splitting that seemed unintentional.

Differential Revision: https://reviews.llvm.org/D109636

Added: 
    mlir/test/mlir-opt/nearmiss.mlir

Modified: 
    mlir/lib/Support/ToolUtilities.cpp
    mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
    mlir/test/Conversion/TosaToSCF/tosa-to-scf.mlir
    mlir/test/Dialect/OpenACC/canonicalize.mlir
    mlir/test/Dialect/SCF/canonicalize.mlir
    mlir/test/Dialect/Shape/canonicalize.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ef1a41114fe9..ca88fef96c0e 100644

--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Support/LLVM.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace mlir;
 
@@ -21,16 +22,55 @@ LogicalResult
 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
                             ChunkBufferHandler processChunkBuffer,
                             raw_ostream &os) {
-  const char splitMarker[] = "// -----";
+  const char splitMarkerConst[] = "// -----";
+  StringRef splitMarker(splitMarkerConst);
+  const int splitMarkerLen = splitMarker.size();
 
   auto *origMemBuffer = originalBuffer.get();
-  SmallVector<StringRef, 8> sourceBuffers;
-  origMemBuffer->getBuffer().split(sourceBuffers, splitMarker);
+  SmallVector<StringRef, 8> rawSourceBuffers;
+  const int checkLen = 2;
+  // Split dropping the last checkLen chars to enable flagging near misses.
+  origMemBuffer->getBuffer().split(rawSourceBuffers,
+                                   splitMarker.drop_back(checkLen));
+  if (rawSourceBuffers.empty())
+    return success();
 
   // Add the original buffer to the source manager.
   llvm::SourceMgr fileSourceMgr;
   fileSourceMgr.AddNewSourceBuffer(std::move(originalBuffer), llvm::SMLoc());
 
+  // Flag near misses by iterating over all the sub-buffers found when splitting
+  // with the prefix of the splitMarker. Use a sliding window where we only add
+  // a buffer as a sourceBuffer if terminated by a full match of the
+  // splitMarker, else flag a warning (if near miss) and extend the size of the
+  // buffer under consideration.
+  SmallVector<StringRef, 8> sourceBuffers;
+  StringRef prev;
+  for (auto buffer : rawSourceBuffers) {
+    if (prev.empty()) {
+      prev = buffer;
+      continue;
+    }
+
+    // Check that suffix is as expected and doesn't have any dash post.
+    bool expectedSuffix = buffer.startswith(splitMarker.take_back(checkLen)) &&
+                          buffer.size() > checkLen && buffer[checkLen] != '0';
+    if (expectedSuffix) {
+      sourceBuffers.push_back(prev);
+      prev = buffer.drop_front(checkLen);
+    } else {
+      // TODO: Consider making this a failure.
+      auto splitLoc = llvm::SMLoc::getFromPointer(buffer.data());
+      fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
+                                 llvm::SourceMgr::DK_Warning,
+                                 "near miss with file split marker");
+      prev = StringRef(prev.data(),
+                       prev.size() + splitMarkerLen - checkLen + buffer.size());
+    }
+  }
+  if (!prev.empty())
+    sourceBuffers.push_back(prev);
+
   // Process each chunk in turn.
   bool hadFailure = false;
   for (auto &subBuffer : sourceBuffers) {

diff  --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
index 49ef94d2a0fc..015cb2fcaaf4 100644
--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@@ -593,7 +593,7 @@ func @broadcast_3_shapes_
diff erent_extents(%a : tensor<2xindex>,
   return
 }
 
-// ----
+// -----
 
 // CHECK-LABEL: @broadcast_to_known_rank
 func @broadcast_to_known_rank(%a : tensor<1xindex>, %b : tensor<3xindex>)

diff  --git a/mlir/test/Conversion/TosaToSCF/tosa-to-scf.mlir b/mlir/test/Conversion/TosaToSCF/tosa-to-scf.mlir
index 82fa2c9f0bb5..d02e7cd8a4db 100644
--- a/mlir/test/Conversion/TosaToSCF/tosa-to-scf.mlir
+++ b/mlir/test/Conversion/TosaToSCF/tosa-to-scf.mlir
@@ -30,7 +30,7 @@ func @while_test(%arg0 : tensor<i32>) -> (tensor<i32>) {
   return %1 : tensor<i32>
 }
 
-// ----
+// -----
 
 // CHECK-LABEL: func @if_test
 // CHECK-SAME: ([[ARG0:%.+]]: tensor<f32>, [[ARG1:%.+]]: tensor<f32>, [[ARG2:%.+]]: tensor<i1>)

diff  --git a/mlir/test/Dialect/OpenACC/canonicalize.mlir b/mlir/test/Dialect/OpenACC/canonicalize.mlir
index 95f98dbac81e..8337f587f973 100644
--- a/mlir/test/Dialect/OpenACC/canonicalize.mlir
+++ b/mlir/test/Dialect/OpenACC/canonicalize.mlir
@@ -61,7 +61,7 @@ func @testupdateop(%a: memref<10xf32>) -> () {
 // CHECK: func @testupdateop
 // CHECK-NOT: acc.update
 
-// ----
+// -----
 
 func @testenterdataop(%a: memref<10xf32>, %ifCond: i1) -> () {
   acc.enter_data if(%ifCond) create(%a: memref<10xf32>)

diff  --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir
index 5c396f2b2118..8d90303777f1 100644
--- a/mlir/test/Dialect/SCF/canonicalize.mlir
+++ b/mlir/test/Dialect/SCF/canonicalize.mlir
@@ -250,7 +250,7 @@ func @empty_if2(%cond: i1) {
 // CHECK-NOT:       scf.if
 // CHECK:           return
 
-// ----
+// -----
 
 func @empty_else(%cond: i1, %v : memref<i1>) {
   scf.if %cond {

diff  --git a/mlir/test/Dialect/Shape/canonicalize.mlir b/mlir/test/Dialect/Shape/canonicalize.mlir
index 4a8839ba2b49..75b92640a998 100644
--- a/mlir/test/Dialect/Shape/canonicalize.mlir
+++ b/mlir/test/Dialect/Shape/canonicalize.mlir
@@ -1331,7 +1331,7 @@ func @cast_extent_tensor(%arg : tensor<*xf32>) -> tensor<3xindex> {
   return %1 : tensor<3xindex>
 }
 
-// ----
+// -----
 
 // CHECK-LABEL: max_same_arg
 // CHECK-SAME: (%[[SHAPE:.*]]: !shape.shape)
@@ -1341,7 +1341,7 @@ func @max_same_arg(%a: !shape.shape) -> !shape.shape {
   return %1 : !shape.shape
 }
 
-// ----
+// -----
 
 // CHECK-LABEL: min_same_arg
 // CHECK-SAME: (%[[SHAPE:.*]]: !shape.shape)
@@ -1350,7 +1350,7 @@ func @min_same_arg(%a: !shape.shape) -> !shape.shape {
   // CHECK: return %[[SHAPE]]
   return %1 : !shape.shape
 }
-// ----
+// -----
 
 // CHECK-LABEL: @cstr_broadcastable_folding
 func @cstr_broadcastable_folding(%arg : tensor<?x4xf32>) {

diff  --git a/mlir/test/mlir-opt/nearmiss.mlir b/mlir/test/mlir-opt/nearmiss.mlir
new file mode 100644
index 000000000000..f22abcc99dbe
--- /dev/null
+++ b/mlir/test/mlir-opt/nearmiss.mlir
@@ -0,0 +1,22 @@
+// RUN: mlir-opt --split-input-file --verify-diagnostics %s 2> %t &&  FileCheck --input-file %t %s
+// RUN: cat %t
+
+func @main() {return}
+
+// -----
+
+// expected-note @+1 {{see existing symbol definition here}}
+func @foo() { return }
+// CHECK: warning: near miss with file split marker
+// CHECK: ----
+// ----
+
+// expected-error @+1 {{redefinition of symbol named 'foo'}}
+func @foo() { return }
+// CHECK: warning: near miss with file split marker
+// CHECK: ----
+// ----
+func @bar2() {return }
+
+// No error flagged at the end for a near miss.
+// ----