[llvm] r307148 - [AsmParser] Mnemonic Spell Corrector

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 5 05:39:14 PDT 2017


Author: sjoerdmeijer
Date: Wed Jul  5 05:39:13 2017
New Revision: 307148

URL: http://llvm.org/viewvc/llvm-project?rev=307148&view=rev
Log:
[AsmParser] Mnemonic Spell Corrector

This implements suggesting other mnemonics when an invalid one is specified,
for example:

$ echo "adXd r1,r2,#3" | llvm-mc -triple arm
<stdin>:1:1: error: invalid instruction, did you mean: add, qadd?
adXd r1,r2,#3
^

The implementation is target agnostic, but as a first step I have added it only
to the ARM backend; so the ARM backend is a good example if someone wants to
enable this too for another target.

Differential Revision: https://reviews.llvm.org/D33128

Added:
    llvm/trunk/test/MC/ARM/invalid-instructions-spellcheck.s
Modified:
    llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
    llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp

Modified: llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp?rev=307148&r1=307147&r2=307148&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/ARM/AsmParser/ARMAsmParser.cpp Wed Jul  5 05:39:13 2017
@@ -8992,6 +8992,8 @@ unsigned ARMAsmParser::MatchInstruction(
   return PlainMatchResult;
 }
 
+std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS);
+
 static const char *getSubtargetFeatureName(uint64_t Val);
 bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                            OperandVector &Operands,
@@ -9085,9 +9087,13 @@ bool ARMAsmParser::MatchAndEmitInstructi
 
     return Error(ErrorLoc, "invalid operand for instruction");
   }
-  case Match_MnemonicFail:
-    return Error(IDLoc, "invalid instruction",
+  case Match_MnemonicFail: {
+    uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
+    std::string Suggestion = ARMMnemonicSpellCheck(
+      ((ARMOperand &)*Operands[0]).getToken(), FBS);
+    return Error(IDLoc, "invalid instruction" + Suggestion,
                  ((ARMOperand &)*Operands[0]).getLocRange());
+  }
   case Match_RequiresNotITBlock:
     return Error(IDLoc, "flag setting instruction only valid outside IT block");
   case Match_RequiresITBlock:

Added: llvm/trunk/test/MC/ARM/invalid-instructions-spellcheck.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/ARM/invalid-instructions-spellcheck.s?rev=307148&view=auto
==============================================================================
--- llvm/trunk/test/MC/ARM/invalid-instructions-spellcheck.s (added)
+++ llvm/trunk/test/MC/ARM/invalid-instructions-spellcheck.s Wed Jul  5 05:39:13 2017
@@ -0,0 +1,68 @@
+@ RUN: not llvm-mc -triple=arm -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple=thumb -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMB
+
+@ This tests the mnemonic spell checker.
+
+@ First check what happens when an instruction is omitted:
+
+  r1, r2, r3
+
+@ CHECK:      error: unexpected token in operand
+@ CHECK-NEXT: r1, r2, r3
+@ CHECK-NEXT:   ^
+
+@ We don't want to see a suggestion here; the edit distance is too large to
+@ give sensible suggestions:
+
+  aaaaaaaaaaaaaaa r1, r2, r3
+
+@ CHECK:      error: invalid instruction
+@ CHECK-NEXT: aaaaaaaaaaaaaaa r1, r2, r3
+@ CHECK-NEXT: ^
+
+@ Check that we get one suggestion: 'pushh' is 1 edit away, i.e. an deletion.
+
+  pushh r1, r2, r3
+
+ at CHECK:      error: invalid instruction, did you mean: push?
+ at CHECK-NEXT: pushh r1, r2, r3
+ at CHECK-NEXT: ^
+
+  adXd r1, r2, r3
+
+@ Check edit distance 1 and 2: 'add' has edit distance of 1 (a deletion),
+@ and 'qadd' a distance of 2 (a deletion and an insertion)
+
+@ CHECK:      error: invalid instruction, did you mean: add, qadd?
+@ CHECK-NEXT: adXd r1, r2, r3
+@ CHECK-NEXT: ^
+
+@ Check edit distance 1 and 2, just insertions:
+
+  ad r1, r2, r3
+
+@ CHECK:      error: invalid instruction, did you mean: adc, add, adr, and, qadd?
+@ CHECK-NEXT: ad r1, r2, r3
+@ CHECK-NEXT: ^
+
+@ Check an instruction that is 2 edits away, and also has a lot of candidates:
+
+  ldre r1, r2, r3
+
+@ CHECK:      error: invalid instruction, did you mean: ldr, ldrb, ldrd, ldrex, ldrexb, ldrexd, ldrexh, ldrh, ldrt?
+@ CHECK-NEXT: ldre r1, r2, r3
+@ CHECK-NEXT: ^
+
+@ Here it is checked that we don't suggest instructions that are not supported.
+@ For example, in Thumb mode we don't want to see suggestions 'faddd' of 'qadd'
+@ because they are not supported.
+
+  fadd r1, r2, r3
+
+@ CHECK-THUMB: error: invalid instruction, did you mean: add?
+@ CHECK-THUMB: fadd r1, r2, r3
+@ CHECK-THUMB: ^
+
+@ CHECK:      error: invalid instruction, did you mean: add, qadd?
+@ CHECK-NEXT: fadd r1, r2, r3
+@ CHECK-NEXT: ^

Modified: llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp?rev=307148&r1=307147&r2=307148&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/AsmMatcherEmitter.cpp Wed Jul  5 05:39:13 2017
@@ -2711,6 +2711,47 @@ static void emitCustomOperandParsing(raw
   OS << "}\n\n";
 }
 
+static void emitMnemonicSpellChecker(raw_ostream &OS, CodeGenTarget &Target,
+                                     unsigned VariantCount) {
+  OS << "std::string " << Target.getName() << "MnemonicSpellCheck(StringRef S, uint64_t FBS) {\n";
+  if (!VariantCount)
+    OS <<  "  return \"\";";
+  else {
+    OS << "  const unsigned MaxEditDist = 2;\n";
+    OS << "  std::vector<StringRef> Candidates;\n";
+    OS << "  StringRef Prev = \"\";\n";
+    OS << "  auto End = std::end(MatchTable0);\n";
+    OS << "\n";
+    OS << "  for (auto I = std::begin(MatchTable0); I < End; I++) {\n";
+    OS << "    // Ignore unsupported instructions.\n";
+    OS << "    if ((FBS & I->RequiredFeatures) != I->RequiredFeatures)\n";
+    OS << "      continue;\n";
+    OS << "\n";
+    OS << "    StringRef T = I->getMnemonic();\n";
+    OS << "    // Avoid recomputing the edit distance for the same string.\n";
+    OS << "    if (T.equals(Prev))\n";
+    OS << "      continue;\n";
+    OS << "\n";
+    OS << "    Prev = T;\n";
+    OS << "    unsigned Dist = S.edit_distance(T, false, MaxEditDist);\n";
+    OS << "    if (Dist <= MaxEditDist)\n";
+    OS << "      Candidates.push_back(T);\n";
+    OS << "  }\n";
+    OS << "\n";
+    OS << "  if (Candidates.empty())\n";
+    OS << "    return \"\";\n";
+    OS << "\n";
+    OS << "  std::string Res = \", did you mean: \";\n";
+    OS << "  unsigned i = 0;\n";
+    OS << "  for( ; i < Candidates.size() - 1; i++)\n";
+    OS << "    Res += Candidates[i].str() + \", \";\n";
+    OS << "  return Res + Candidates[i].str() + \"?\";\n";
+  }
+  OS << "}\n";
+  OS << "\n";
+}
+
+
 void AsmMatcherEmitter::run(raw_ostream &OS) {
   CodeGenTarget Target(Records);
   Record *AsmParser = Target.getAsmParser();
@@ -2974,6 +3015,8 @@ void AsmMatcherEmitter::run(raw_ostream
     OS << "};\n\n";
   }
 
+  emitMnemonicSpellChecker(OS, Target, VariantCount);
+
   // Finally, build the match function.
   OS << "unsigned " << Target.getName() << ClassName << "::\n"
      << "MatchInstructionImpl(const OperandVector &Operands,\n";




More information about the llvm-commits mailing list