[llvm] [TableGen] Rework error reporting for duplicate Feature/Processor. (PR #102257)

Wed Aug 7 11:42:17 PDT 2024

https://github.com/jurahul updated https://github.com/llvm/llvm-project/pull/102257

>From f3954a120f591e056f7061b8a60bfebd247d0e54 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <rjoshi at nvidia.com>
Date: Tue, 6 Aug 2024 17:04:28 -0700
Subject: [PATCH] [TableGen] Rework error reporting for duplicate
 Feature/Processor.

- Extract code for sorting and checking duplicate Records into a helper
  function and update `collectProcModels` to use the helper.
- Update `FeatureKeyValues` to:
  (a) Remove code for duplicate checks and use the helper.
  (b) Trim features with empty name explicitly to be able to use
      the helper.
- Make the sorting deterministic by using record name as a secondary
  key for sorting, and re-enable SubtargetFeatureUniqueNames.td test
  that was disabled due to the non-determinism of the error messages.
- Change wording of error message when duplicate records are found to
  be source code position agnostic, since `First` may not be before
  `Second` lexically.
---
 llvm/include/llvm/TableGen/Record.h           |  3 +-
 llvm/test/TableGen/ProcessorUniqueNames.td    | 12 +++++
 .../TableGen/SubtargetFeatureUniqueNames.td   |  9 ++--
 llvm/utils/TableGen/Common/CMakeLists.txt     |  1 +
 .../utils/TableGen/Common/CodeGenSchedule.cpp | 15 ++----
 llvm/utils/TableGen/Common/Types.h            |  6 +--
 llvm/utils/TableGen/Common/Utils.cpp          | 48 +++++++++++++++++++
 llvm/utils/TableGen/Common/Utils.h            | 25 ++++++++++
 llvm/utils/TableGen/SubtargetEmitter.cpp      | 43 +++++++----------
 9 files changed, 115 insertions(+), 47 deletions(-)
 create mode 100644 llvm/test/TableGen/ProcessorUniqueNames.td
 create mode 100644 llvm/utils/TableGen/Common/Utils.cpp
 create mode 100644 llvm/utils/TableGen/Common/Utils.h

diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 001c2bb4670d3..e6e87eee2c6ba 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -2112,8 +2112,7 @@ struct LessRecordByID {
   }
 };
 
-/// Sorting predicate to sort record pointers by their
-/// name field.
+/// Sorting predicate to sort record pointers by their Name field.
 struct LessRecordFieldName {
   bool operator()(const Record *Rec1, const Record *Rec2) const {
     return Rec1->getValueAsString("Name") < Rec2->getValueAsString("Name");
diff --git a/llvm/test/TableGen/ProcessorUniqueNames.td b/llvm/test/TableGen/ProcessorUniqueNames.td
new file mode 100644
index 0000000000000..8fdc6c42b857d
--- /dev/null
+++ b/llvm/test/TableGen/ProcessorUniqueNames.td
@@ -0,0 +1,12 @@
+// RUN: not llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s -DFILE=%s
+// Verify that processors with same names result in an error.
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+
+def ProcessorB : ProcessorModel<"NameA", NoSchedModel, []>;
+
+// CHECK: [[FILE]]:[[@LINE+2]]:5: error: Processor `NameA` is already defined.
+// CHECK: [[FILE]]:[[@LINE-3]]:5: note: Previous definition here.
+def ProcessorA : ProcessorModel<"NameA", NoSchedModel, []>;
diff --git a/llvm/test/TableGen/SubtargetFeatureUniqueNames.td b/llvm/test/TableGen/SubtargetFeatureUniqueNames.td
index a322d25fc1def..0ee708e2afaa2 100644
--- a/llvm/test/TableGen/SubtargetFeatureUniqueNames.td
+++ b/llvm/test/TableGen/SubtargetFeatureUniqueNames.td
@@ -1,6 +1,3 @@
-// Temporarily disable test due to non-deterministic order of error messages.
-// UNSUPPORTED: target={{.*}}
-
 // RUN: not llvm-tblgen -gen-subtarget -I %p/../../include %s 2>&1 | FileCheck %s -DFILE=%s
 // Verify that subtarget features with same names result in an error.
 
@@ -8,8 +5,8 @@ include "llvm/Target/Target.td"
 
 def MyTarget : Target;
 
-def FeatureA : SubtargetFeature<"NameA", "", "", "">;
+def FeatureB : SubtargetFeature<"NameA", "", "", "">;
 
-// CHECK: [[FILE]]:[[@LINE+2]]:5: error: Feature `NameA` already defined.
+// CHECK: [[FILE]]:[[@LINE+2]]:5: error: Feature `NameA` is already defined.
 // CHECK: [[FILE]]:[[@LINE-3]]:5: note: Previous definition here.
-def FeatureB : SubtargetFeature<"NameA", "", "", "">;
+def FeatureA : SubtargetFeature<"NameA", "", "", "">;
diff --git a/llvm/utils/TableGen/Common/CMakeLists.txt b/llvm/utils/TableGen/Common/CMakeLists.txt
index 13883aa8fa391..ce5092b6962ba 100644
--- a/llvm/utils/TableGen/Common/CMakeLists.txt
+++ b/llvm/utils/TableGen/Common/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_library(LLVMTableGenCommon STATIC OBJECT EXCLUDE_FROM_ALL
   PredicateExpander.cpp
   SubtargetFeatureInfo.cpp
   Types.cpp
+  Utils.cpp
   VarLenCodeEmitterGen.cpp
 
   LINK_LIBS
diff --git a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp
index 152e68797b991..6386cc8eb32db 100644
--- a/llvm/utils/TableGen/Common/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenSchedule.cpp
@@ -14,6 +14,7 @@
 #include "CodeGenSchedule.h"
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "Utils.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -533,17 +534,9 @@ void CodeGenSchedModels::collectOptionalProcessorInfo() {
 /// Gather all processor models.
 void CodeGenSchedModels::collectProcModels() {
   RecVec ProcRecords = Records.getAllDerivedDefinitions("Processor");
-  llvm::sort(ProcRecords, LessRecordFieldName());
-
-  // Check for duplicated names.
-  auto I = std::adjacent_find(ProcRecords.begin(), ProcRecords.end(),
-                              [](const Record *Rec1, const Record *Rec2) {
-                                return Rec1->getValueAsString("Name") ==
-                                       Rec2->getValueAsString("Name");
-                              });
-  if (I != ProcRecords.end())
-    PrintFatalError((*I)->getLoc(), "Duplicate processor name " +
-                                        (*I)->getValueAsString("Name"));
+
+  // Sort and check duplicate Processor name.
+  sortAndReportDuplicates(ProcRecords, "Processor");
 
   // Reserve space because we can. Reallocation would be ok.
   ProcModels.reserve(ProcRecords.size() + 1);
diff --git a/llvm/utils/TableGen/Common/Types.h b/llvm/utils/TableGen/Common/Types.h
index 74f0f9f2792c4..073b91f8ab28a 100644
--- a/llvm/utils/TableGen/Common/Types.h
+++ b/llvm/utils/TableGen/Common/Types.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_UTILS_TABLEGEN_TYPES_H
-#define LLVM_UTILS_TABLEGEN_TYPES_H
+#ifndef LLVM_UTILS_TABLEGEN_COMMON_TYPES_H
+#define LLVM_UTILS_TABLEGEN_COMMON_TYPES_H
 
 #include <cstdint>
 
@@ -18,4 +18,4 @@ namespace llvm {
 const char *getMinimalTypeForRange(uint64_t Range, unsigned MaxSize = 64);
 } // namespace llvm
 
-#endif
+#endif // LLVM_UTILS_TABLEGEN_COMMON_TYPES_H
diff --git a/llvm/utils/TableGen/Common/Utils.cpp b/llvm/utils/TableGen/Common/Utils.cpp
new file mode 100644
index 0000000000000..29b5120bcf511
--- /dev/null
+++ b/llvm/utils/TableGen/Common/Utils.cpp
@@ -0,0 +1,48 @@
+//===- Utils.cpp - Common Utilities -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Utils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+
+using namespace llvm;
+
+namespace {
+/// Sorting predicate to sort record pointers by their Name field, and break
+/// ties using record ID (which corresponds to creation/parse order).
+struct LessRecordFieldNameAndID {
+  bool operator()(const Record *Rec1, const Record *Rec2) const {
+    return std::tuple(Rec1->getValueAsString("Name"), Rec1->getID()) <
+           std::tuple(Rec2->getValueAsString("Name"), Rec2->getID());
+  }
+};
+} // End anonymous namespace
+
+/// Sort an array of Records on the "Name" field, and check for records with
+/// duplicate "Name" field. If duplicates are found, report a fatal error.
+void llvm::sortAndReportDuplicates(MutableArrayRef<Record *> Records,
+                                   StringRef ObjectName) {
+  llvm::sort(Records, LessRecordFieldNameAndID());
+
+  auto I = std::adjacent_find(Records.begin(), Records.end(),
+                              [](const Record *Rec1, const Record *Rec2) {
+                                return Rec1->getValueAsString("Name") ==
+                                       Rec2->getValueAsString("Name");
+                              });
+  if (I == Records.end())
+    return;
+
+  // Found a duplicate name.
+  const Record *First = *I;
+  const Record *Second = *(I + 1);
+  StringRef Name = First->getValueAsString("Name");
+  PrintError(Second, ObjectName + " `" + Name + "` is already defined.");
+  PrintFatalNote(First, "Previous definition here.");
+}
diff --git a/llvm/utils/TableGen/Common/Utils.h b/llvm/utils/TableGen/Common/Utils.h
new file mode 100644
index 0000000000000..522f541c2d698
--- /dev/null
+++ b/llvm/utils/TableGen/Common/Utils.h
@@ -0,0 +1,25 @@
+//===- Utils.h - Common Utilities -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_COMMON_UTILS_H
+#define LLVM_UTILS_TABLEGEN_COMMON_UTILS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class Record;
+
+/// Sort an array of Records on the "Name" field, and check for records with
+/// duplicate "Name" field. If duplicates are found, report a fatal error.
+void sortAndReportDuplicates(MutableArrayRef<Record *> Records,
+                             StringRef ObjectName);
+
+} // namespace llvm
+
+#endif // LLVM_UTILS_TABLEGEN_COMMON_UTILS_H
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 163a9dc947060..66ca38ee5ae2f 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -14,6 +14,7 @@
 #include "Common/CodeGenSchedule.h"
 #include "Common/CodeGenTarget.h"
 #include "Common/PredicateExpander.h"
+#include "Common/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -250,34 +251,30 @@ void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) {
 //
 unsigned SubtargetEmitter::FeatureKeyValues(
     raw_ostream &OS, const DenseMap<Record *, unsigned> &FeatureMap) {
-  // Gather and sort all the features
   std::vector<Record *> FeatureList =
       Records.getAllDerivedDefinitions("SubtargetFeature");
 
+  // Remove features with empty name.
+  llvm::erase_if(FeatureList, [](const Record *Rec) {
+    return Rec->getValueAsString("Name").empty();
+  });
   if (FeatureList.empty())
     return 0;
 
-  llvm::sort(FeatureList, LessRecordFieldName());
-
-  // Check that there are no duplicate features.
-  DenseMap<StringRef, const Record *> UniqueFeatures;
+  // Sort and check duplicate Feature name.
+  sortAndReportDuplicates(FeatureList, "Feature");
 
-  // Begin feature table
+  // Begin feature table.
   OS << "// Sorted (by key) array of values for CPU features.\n"
      << "extern const llvm::SubtargetFeatureKV " << Target
      << "FeatureKV[] = {\n";
 
-  // For each feature
-  unsigned NumFeatures = 0;
   for (const Record *Feature : FeatureList) {
     // Next feature
     StringRef Name = Feature->getName();
     StringRef CommandLineName = Feature->getValueAsString("Name");
     StringRef Desc = Feature->getValueAsString("Desc");
 
-    if (CommandLineName.empty())
-      continue;
-
     // Emit as { "feature", "description", { featureEnum }, { i1 , i2 , ... , in
     // } }
     OS << "  { "
@@ -289,20 +286,12 @@ unsigned SubtargetEmitter::FeatureKeyValues(
     printFeatureMask(OS, ImpliesList, FeatureMap);
 
     OS << " },\n";
-    ++NumFeatures;
-
-    auto [It, Inserted] = UniqueFeatures.insert({CommandLineName, Feature});
-    if (!Inserted) {
-      PrintError(Feature, "Feature `" + CommandLineName + "` already defined.");
-      const Record *Previous = It->second;
-      PrintFatalNote(Previous, "Previous definition here.");
-    }
   }
 
-  // End feature table
+  // End feature table.
   OS << "};\n";
 
-  return NumFeatures;
+  return FeatureList.size();
 }
 
 //
@@ -317,18 +306,22 @@ SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
       Records.getAllDerivedDefinitions("Processor");
   llvm::sort(ProcessorList, LessRecordFieldName());
 
-  // Begin processor table
+  // Note that unlike `FeatureKeyValues`, here we do not need to check for
+  // duplicate processors, since that is already done when the SubtargetEmitter
+  // constructor calls `getSchedModels` to build a `CodeGenSchedModels` object,
+  // which does the duplicate processor check.
+
+  // Begin processor table.
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
      << "extern const llvm::SubtargetSubTypeKV " << Target
      << "SubTypeKV[] = {\n";
 
-  // For each processor
   for (Record *Processor : ProcessorList) {
     StringRef Name = Processor->getValueAsString("Name");
     RecVec FeatureList = Processor->getValueAsListOfDefs("Features");
     RecVec TuneFeatureList = Processor->getValueAsListOfDefs("TuneFeatures");
 
-    // Emit as { "cpu", "description", 0, { f1 , f2 , ... fn } },
+    // Emit as "{ "cpu", "description", 0, { f1 , f2 , ... fn } },".
     OS << " { "
        << "\"" << Name << "\", ";
 
@@ -342,7 +335,7 @@ SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
     OS << ", &" << ProcModelName << " },\n";
   }
 
-  // End processor table
+  // End processor table.
   OS << "};\n";
 
   return ProcessorList.size();