[llvm] af1c2e5 - [ARM] Fix dropped dollar sign from symbols in branch targets

Lucas Prates via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 6 08:25:22 PST 2020


Author: Lucas Prates
Date: 2020-03-06T16:25:08Z
New Revision: af1c2e561e5fbd584093b1200cf364fe1b7ec7c4

URL: https://github.com/llvm/llvm-project/commit/af1c2e561e5fbd584093b1200cf364fe1b7ec7c4
DIFF: https://github.com/llvm/llvm-project/commit/af1c2e561e5fbd584093b1200cf364fe1b7ec7c4.diff

LOG: [ARM] Fix dropped dollar sign from symbols in branch targets

Summary:
ARMAsmParser was incorrectly dropping a leading dollar sign character
from symbol names in targets of branch instructions. This was caused by
an incorrect assumption that the contents following the dollar sign
token should be handled as a constant immediate, similarly to the #
token.

This patch avoids the operand parsing from consuming the dollar sign
token when it is followed by an identifier, making sure it is properly
parsed as part of the expression.

Reviewers: efriedma

Reviewed By: efriedma

Subscribers: danielkiss, chill, carwil, vhscampos, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73176

Added: 
    

Modified: 
    llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
    llvm/test/MC/ARM/arm-branches.s

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 3ffee6804b3c..c1bddfb847d6 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6119,20 +6119,35 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   case AsmToken::LCurly:
     return parseRegisterList(Operands, !Mnemonic.startswith("clr"));
   case AsmToken::Dollar:
-  case AsmToken::Hash:
-    // #42 -> immediate.
+  case AsmToken::Hash: {
+    // #42 -> immediate
+    // $ 42 -> immediate
+    // $foo -> symbol name
+    // $42 -> symbol name
     S = Parser.getTok().getLoc();
-    Parser.Lex();
+
+    // Favor the interpretation of $-prefixed operands as symbol names.
+    // Cases where immediates are explicitly expected are handled by their
+    // specific ParseMethod implementations.
+    auto AdjacentToken = getLexer().peekTok(/*ShouldSkipSpace=*/false);
+    bool ExpectIdentifier = Parser.getTok().is(AsmToken::Dollar) &&
+                            (AdjacentToken.is(AsmToken::Identifier) ||
+                             AdjacentToken.is(AsmToken::Integer));
+    if (!ExpectIdentifier) {
+      // Token is not part of identifier. Drop leading $ or # before parsing
+      // expression.
+      Parser.Lex();
+    }
 
     if (Parser.getTok().isNot(AsmToken::Colon)) {
-      bool isNegative = Parser.getTok().is(AsmToken::Minus);
+      bool IsNegative = Parser.getTok().is(AsmToken::Minus);
       const MCExpr *ImmVal;
       if (getParser().parseExpression(ImmVal))
         return true;
       const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
       if (CE) {
         int32_t Val = CE->getValue();
-        if (isNegative && Val == 0)
+        if (IsNegative && Val == 0)
           ImmVal = MCConstantExpr::create(std::numeric_limits<int32_t>::min(),
                                           getContext());
       }
@@ -6151,7 +6166,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     }
     // w/ a ':' after the '#', it's just like a plain ':'.
     LLVM_FALLTHROUGH;
-
+  }
   case AsmToken::Colon: {
     S = Parser.getTok().getLoc();
     // ":lower16:" and ":upper16:" expression prefixes

diff  --git a/llvm/test/MC/ARM/arm-branches.s b/llvm/test/MC/ARM/arm-branches.s
index 4a451114d888..da719a6bcd2e 100644
--- a/llvm/test/MC/ARM/arm-branches.s
+++ b/llvm/test/MC/ARM/arm-branches.s
@@ -13,3 +13,32 @@
 @ CHECK: bl	#4                      @ encoding: [0x01,0x00,0x00,0xeb]
 @ CHECK: beq	#4                      @ encoding: [0x01,0x00,0x00,0x0a]
 @ CHECK: blx	#2                      @ encoding: [0x00,0x00,0x00,0xfb]
+
+ at ------------------------------------------------------------------------------
+@ Leading '$' on branch targets must not be dropped if part of symbol names
+ at ------------------------------------------------------------------------------
+
+        .global $foo
+        b $foo
+        bl $foo
+        beq $foo
+        blx $foo
+        b $foo + 4
+
+@ CHECK: b      ($foo)                      @ encoding: [A,A,A,0xea]
+@ CHECK: bl     ($foo)                      @ encoding: [A,A,A,0xeb]
+@ CHECK: beq    ($foo)                      @ encoding: [A,A,A,0x0a]
+@ CHECK: blx    ($foo)                      @ encoding: [A,A,A,0xfa]
+@ CHECK: b      #($foo)+4                   @ encoding: [A,A,A,0xea]
+
+ at ------------------------------------------------------------------------------
+@ Leading '$' should be allowed to introduce an expression
+ at ------------------------------------------------------------------------------
+
+        .global bar
+        b $ 4
+        bl $ bar + 4
+        blx $ bar
+@ CHECK: b	    #4                        @ encoding: [0x01,0x00,0x00,0xea]
+@ CHECK: bl     #bar+4                    @ encoding: [A,A,A,0xeb]
+@ CHECK: blx    bar                       @ encoding: [A,A,A,0xfa]


        


More information about the llvm-commits mailing list