[llvm] [ARM] FIX: change `pkhtb` custom parsing produce NoMatch rather than Error (PR #85765)

Alfie Richards via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 19 04:52:14 PDT 2024


https://github.com/AlfieRichardsArm updated https://github.com/llvm/llvm-project/pull/85765

>From 61ee60c6bb5da85619789a5fb1b10f6a46cc2b87 Mon Sep 17 00:00:00 2001
From: Alfie Richards <alfie.richards at arm.com>
Date: Tue, 19 Mar 2024 10:02:43 +0000
Subject: [PATCH 1/4] [ARM] Fix PKHImm parsing.

This was broken by https://github.com/llvm/llvm-project/pull/83436 as in
optional operands meant sometimes the parsePKHImm parser is applied to
operands in other positions, which previously produced an error.

Now this instead fails the parse. However this unfortunately means
the default parsing happens which combines the shift and the base operator
which causes the diagnostics to be non-sense.

To fix this I had to add a special case diagnostic which is unfortunate.
Ideally these two methods for handling operands should be unified and the
TableGen matching mechanisms should be used for validaing parsed shifts rather
than special cases in parsing.
---
 .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 21 +++++++++++++++++--
 llvm/test/MC/ARM/basic-arm-instructions.s     |  4 ++++
 llvm/test/MC/ARM/diagnostics.s                |  4 ++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 026d4ca06e87b9..e21310c53f45a0 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -4318,6 +4318,23 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
     }
   }
 
+  auto MnemonicOp = static_cast<ARMOperand &>(*Operands[0]);
+  // These instructions handle shift operands as seperate operators to the
+  // register. In these cases doing the default conversion makes nonsense
+  // diagnostics.
+  // FIXME: Unify the different methods for handling shift operators
+  // and use TableGen matching mechanisms to do the validation.
+  static const DenseSet<StringRef> NoDefaultConvertSet{"pkhbt", "pkhtb"};
+  bool ShouldDefaultConvert =
+      (MnemonicOp.isToken() &&
+       !NoDefaultConvertSet.contains(MnemonicOp.getToken()));
+  if (!ShouldDefaultConvert) {
+    // If we get to this point the parsing for the shift operator in
+    // parsePKHLSLImm has failed. So we can generate a diagnostic here.
+    Error(S, "shift operator is malformed for this instruction");
+    return -1;
+  }
+
   if (ShiftReg && ShiftTy != ARM_AM::rrx)
     Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
                                                          ShiftReg, Imm,
@@ -5289,12 +5306,12 @@ ParseStatus ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op,
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
-    return Error(Parser.getTok().getLoc(), Op + " operand expected.");
+    return ParseStatus::NoMatch;
   StringRef ShiftName = Tok.getString();
   std::string LowerOp = Op.lower();
   std::string UpperOp = Op.upper();
   if (ShiftName != LowerOp && ShiftName != UpperOp)
-    return Error(Parser.getTok().getLoc(), Op + " operand expected.");
+    return ParseStatus::NoMatch;
   Parser.Lex(); // Eat shift type token.
 
   // There must be a '#' and a shift amount.
diff --git a/llvm/test/MC/ARM/basic-arm-instructions.s b/llvm/test/MC/ARM/basic-arm-instructions.s
index 055f3ce316153d..84a7cf52fa30e0 100644
--- a/llvm/test/MC/ARM/basic-arm-instructions.s
+++ b/llvm/test/MC/ARM/basic-arm-instructions.s
@@ -1774,6 +1774,9 @@ Lforward:
         pkhtb r2, r2, r3, asr #31
         pkhtb r2, r2, r3, asr #15
 
+        it ne
+        pkhtbne r2, r2, r3, asr #15
+
 @ CHECK: pkhbt	r2, r2, r3              @ encoding: [0x13,0x20,0x82,0xe6]
 @ CHECK: pkhbt	r2, r2, r3, lsl #31     @ encoding: [0x93,0x2f,0x82,0xe6]
 @ CHECK: pkhbt	r2, r2, r3              @ encoding: [0x13,0x20,0x82,0xe6]
@@ -1782,6 +1785,7 @@ Lforward:
 @ CHECK: pkhbt	r2, r3, r2              @ encoding: [0x12,0x20,0x83,0xe6]
 @ CHECK: pkhtb	r2, r2, r3, asr #31     @ encoding: [0xd3,0x2f,0x82,0xe6]
 @ CHECK: pkhtb	r2, r2, r3, asr #15     @ encoding: [0xd3,0x27,0x82,0xe6]
+@ CHECK: pkhtbne r2, r2, r3, asr #15    @ encoding: [0xd3,0x27,0x82,0x16]
 
 @------------------------------------------------------------------------------
 @ FIXME: PLD
diff --git a/llvm/test/MC/ARM/diagnostics.s b/llvm/test/MC/ARM/diagnostics.s
index fa23a7da1e4048..b6d011e5bdef8f 100644
--- a/llvm/test/MC/ARM/diagnostics.s
+++ b/llvm/test/MC/ARM/diagnostics.s
@@ -235,10 +235,10 @@
 @ CHECK-ERRORS: error: immediate value out of range
 @ CHECK-ERRORS:         pkhtb r2, r2, r3, asr #33
 @ CHECK-ERRORS:                                ^
-@ CHECK-ERRORS: error: lsl operand expected.
+@ CHECK-ERRORS: error: shift operator is malformed for this instruction
 @ CHECK-ERRORS:         pkhbt r2, r2, r3, asr #3
 @ CHECK-ERRORS:                           ^
-@ CHECK-ERRORS: error: asr operand expected.
+@ CHECK-ERRORS: error: shift operator is malformed for this instruction
 @ CHECK-ERRORS:         pkhtb r2, r2, r3, lsl #3
 @ CHECK-ERRORS:                           ^
 

>From b9957c4011f5e3c4df6230e4719ab7987758f8e6 Mon Sep 17 00:00:00 2001
From: Alfie Richards <alfie.richards at arm.com>
Date: Tue, 19 Mar 2024 11:12:46 +0000
Subject: [PATCH 2/4] Nit fix

---
 llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index e21310c53f45a0..cd5193cff2989e 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1943,7 +1943,7 @@ class ARMOperand : public MCParsedAsmOperand {
 
       // And be in the right range, depending on the amount that it is shifted
       // by.  Shift 0, is equal to 7 unsigned bits, the sign bit is set
-      // separately.
+      // ly.
       int64_t Range = (1U << (7 + shift)) - 1;
       return (Val == INT32_MIN) || (Val > -Range && Val < Range);
     }
@@ -4319,7 +4319,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
   }
 
   auto MnemonicOp = static_cast<ARMOperand &>(*Operands[0]);
-  // These instructions handle shift operands as seperate operators to the
+  // These instructions handle shift operands as separate operators to the
   // register. In these cases doing the default conversion makes nonsense
   // diagnostics.
   // FIXME: Unify the different methods for handling shift operators
@@ -6190,7 +6190,7 @@ ParseStatus ARMAsmParser::parseFPImm(OperandVector &Operands) {
 
   Parser.Lex(); // Eat '#' or '$'.
 
-  // Handle negation, as that still comes through as a separate token.
+  // Handle negation, as that still comes through as a  token.
   bool isNegative = false;
   if (Parser.getTok().is(AsmToken::Minus)) {
     isNegative = true;
@@ -6333,7 +6333,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
       Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
 
-      // There can be a trailing '!' on operands that we want as a separate
+      // There can be a trailing '!' on operands that we want as a 
       // '!' Token operand. Handle that here. For example, the compatibility
       // alias for 'srsdb sp!, #imm' is 'srsdb #imm!'.
       if (Parser.getTok().is(AsmToken::Exclaim)) {
@@ -7315,7 +7315,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     if (!usedVPTPredicationCode) {
       // If we have a VPT predication code and we haven't just turned it
       // into an operand, then it was a mistake for splitMnemonic to
-      // separate it from the rest of the mnemonic in the first place,
+      //  it from the rest of the mnemonic in the first place,
       // and this may lead to wrong disassembly (e.g. scalar floating
       // point VCMPE is actually a different instruction from VCMP, so
       // we mustn't treat them the same). In that situation, glue it

>From b33f00d515b75185a4e3043844dd5bdd2e9ca1bf Mon Sep 17 00:00:00 2001
From: Alfie Richards <alfie.richards at arm.com>
Date: Tue, 19 Mar 2024 11:47:50 +0000
Subject: [PATCH 3/4] Much neater way to achieve same result. Avoids changing
 the diagnostic also.

---
 .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 75 +++++++++----------
 .../ARM/MCTargetDesc/ARMAddressingModes.h     |  2 +-
 llvm/test/MC/ARM/diagnostics.s                |  4 +-
 3 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cd5193cff2989e..a9bebc846573f4 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -63,6 +63,7 @@
 #include <iterator>
 #include <limits>
 #include <memory>
+#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
@@ -457,6 +458,7 @@ class ARMAsmParser : public MCTargetAsmParser {
   int tryParseRegister(bool AllowOutofBoundReg = false);
   bool tryParseRegisterWithWriteBack(OperandVector &);
   int tryParseShiftRegister(OperandVector &);
+  std::optional<ARM_AM::ShiftOpc> tryParseShiftToken();
   bool parseRegisterList(OperandVector &, bool EnforceOrder = true,
                          bool AllowRAAC = false,
                          bool AllowOutOfBoundReg = false);
@@ -647,12 +649,12 @@ class ARMAsmParser : public MCTargetAsmParser {
   ParseStatus parseProcIFlagsOperand(OperandVector &);
   ParseStatus parseMSRMaskOperand(OperandVector &);
   ParseStatus parseBankedRegOperand(OperandVector &);
-  ParseStatus parsePKHImm(OperandVector &O, StringRef Op, int Low, int High);
+  ParseStatus parsePKHImm(OperandVector &O, ARM_AM::ShiftOpc, int Low, int High);
   ParseStatus parsePKHLSLImm(OperandVector &O) {
-    return parsePKHImm(O, "lsl", 0, 31);
+    return parsePKHImm(O, ARM_AM::lsl, 0, 31);
   }
   ParseStatus parsePKHASRImm(OperandVector &O) {
-    return parsePKHImm(O, "asr", 1, 32);
+    return parsePKHImm(O, ARM_AM::asr, 1, 32);
   }
   ParseStatus parseSetEndImm(OperandVector &);
   ParseStatus parseShifterImm(OperandVector &);
@@ -4228,30 +4230,36 @@ int ARMAsmParser::tryParseRegister(bool AllowOutOfBoundReg) {
   return RegNum;
 }
 
-// Try to parse a shifter  (e.g., "lsl <amt>"). On success, return 0.
-// If a recoverable error occurs, return 1. If an irrecoverable error
-// occurs, return -1. An irrecoverable error is one where tokens have been
-// consumed in the process of trying to parse the shifter (i.e., when it is
-// indeed a shifter operand, but malformed).
-int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
+std::optional<ARM_AM::ShiftOpc> ARMAsmParser::tryParseShiftToken() {
   MCAsmParser &Parser = getParser();
-  SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
-    return -1;
-
+    return std::nullopt;
+  
   std::string lowerCase = Tok.getString().lower();
-  ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+  return StringSwitch<std::optional<ARM_AM::ShiftOpc>>(lowerCase)
       .Case("asl", ARM_AM::lsl)
       .Case("lsl", ARM_AM::lsl)
       .Case("lsr", ARM_AM::lsr)
       .Case("asr", ARM_AM::asr)
       .Case("ror", ARM_AM::ror)
       .Case("rrx", ARM_AM::rrx)
-      .Default(ARM_AM::no_shift);
+      .Default(std::nullopt);
+}
+
+// Try to parse a shifter  (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  SMLoc S = Parser.getTok().getLoc();
 
-  if (ShiftTy == ARM_AM::no_shift)
+  auto ShiftTyOpt = tryParseShiftToken();
+  if (ShiftTyOpt == std::nullopt)
     return 1;
+  auto ShiftTy = ShiftTyOpt.value();
 
   Parser.Lex(); // Eat the operator.
 
@@ -4318,22 +4326,6 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
     }
   }
 
-  auto MnemonicOp = static_cast<ARMOperand &>(*Operands[0]);
-  // These instructions handle shift operands as separate operators to the
-  // register. In these cases doing the default conversion makes nonsense
-  // diagnostics.
-  // FIXME: Unify the different methods for handling shift operators
-  // and use TableGen matching mechanisms to do the validation.
-  static const DenseSet<StringRef> NoDefaultConvertSet{"pkhbt", "pkhtb"};
-  bool ShouldDefaultConvert =
-      (MnemonicOp.isToken() &&
-       !NoDefaultConvertSet.contains(MnemonicOp.getToken()));
-  if (!ShouldDefaultConvert) {
-    // If we get to this point the parsing for the shift operator in
-    // parsePKHLSLImm has failed. So we can generate a diagnostic here.
-    Error(S, "shift operator is malformed for this instruction");
-    return -1;
-  }
 
   if (ShiftReg && ShiftTy != ARM_AM::rrx)
     Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
@@ -5301,17 +5293,22 @@ ParseStatus ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
   return ParseStatus::Success;
 }
 
-ParseStatus ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op,
+// FIXME: Unify the different methods for handling shift operators
+// and use TableGen matching mechanisms to do the validation rather than
+// separate parsing paths.
+ParseStatus ARMAsmParser::parsePKHImm(OperandVector &Operands, ARM_AM::ShiftOpc Op,
                                       int Low, int High) {
   MCAsmParser &Parser = getParser();
-  const AsmToken &Tok = Parser.getTok();
-  if (Tok.isNot(AsmToken::Identifier))
-    return ParseStatus::NoMatch;
-  StringRef ShiftName = Tok.getString();
-  std::string LowerOp = Op.lower();
-  std::string UpperOp = Op.upper();
-  if (ShiftName != LowerOp && ShiftName != UpperOp)
+  auto ShiftCodeOpt = tryParseShiftToken();
+
+  if (!ShiftCodeOpt.has_value())
     return ParseStatus::NoMatch;
+  auto ShiftCode = ShiftCodeOpt.value();
+
+  // The wrong shift code has been provided. Can error here as has matched the correct operand in this case.
+  if (ShiftCode != Op)
+    return Error(Parser.getTok().getLoc(),  ARM_AM::getShiftOpcStr(Op) + " operand expected.");
+
   Parser.Lex(); // Eat shift type token.
 
   // There must be a '#' and a shift amount.
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 163360c08ffba0..28e5840fdde5b4 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -41,7 +41,7 @@ namespace ARM_AM {
 
   inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; }
 
-  inline const char *getShiftOpcStr(ShiftOpc Op) {
+  inline const StringRef getShiftOpcStr(ShiftOpc Op) {
     switch (Op) {
     default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::asr: return "asr";
diff --git a/llvm/test/MC/ARM/diagnostics.s b/llvm/test/MC/ARM/diagnostics.s
index b6d011e5bdef8f..fa23a7da1e4048 100644
--- a/llvm/test/MC/ARM/diagnostics.s
+++ b/llvm/test/MC/ARM/diagnostics.s
@@ -235,10 +235,10 @@
 @ CHECK-ERRORS: error: immediate value out of range
 @ CHECK-ERRORS:         pkhtb r2, r2, r3, asr #33
 @ CHECK-ERRORS:                                ^
-@ CHECK-ERRORS: error: shift operator is malformed for this instruction
+@ CHECK-ERRORS: error: lsl operand expected.
 @ CHECK-ERRORS:         pkhbt r2, r2, r3, asr #3
 @ CHECK-ERRORS:                           ^
-@ CHECK-ERRORS: error: shift operator is malformed for this instruction
+@ CHECK-ERRORS: error: asr operand expected.
 @ CHECK-ERRORS:         pkhtb r2, r2, r3, lsl #3
 @ CHECK-ERRORS:                           ^
 

>From 73eca78c20ba159ba45a614368218c5af98abbaf Mon Sep 17 00:00:00 2001
From: Alfie Richards <alfie.richards at arm.com>
Date: Tue, 19 Mar 2024 11:51:58 +0000
Subject: [PATCH 4/4] Formatting

---
 llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index a9bebc846573f4..f03a2bb0b5c847 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1945,7 +1945,7 @@ class ARMOperand : public MCParsedAsmOperand {
 
       // And be in the right range, depending on the amount that it is shifted
       // by.  Shift 0, is equal to 7 unsigned bits, the sign bit is set
-      // ly.
+      // separately.
       int64_t Range = (1U << (7 + shift)) - 1;
       return (Val == INT32_MIN) || (Val > -Range && Val < Range);
     }
@@ -4235,7 +4235,7 @@ std::optional<ARM_AM::ShiftOpc> ARMAsmParser::tryParseShiftToken() {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier))
     return std::nullopt;
-  
+
   std::string lowerCase = Tok.getString().lower();
   return StringSwitch<std::optional<ARM_AM::ShiftOpc>>(lowerCase)
       .Case("asl", ARM_AM::lsl)
@@ -4326,7 +4326,6 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
     }
   }
 
-
   if (ShiftReg && ShiftTy != ARM_AM::rrx)
     Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
                                                          ShiftReg, Imm,



More information about the llvm-commits mailing list