[llvm] d97f17a - [MC][ARM] Fix crash when assembling Thumb 'movs r0,#foo'. (#115026)

Thu Nov 14 01:15:02 PST 2024

Author: Simon Tatham
Date: 2024-11-14T09:14:59Z
New Revision: d97f17a95982bab49ecdfb9b45ef3c7d7e3d143e

URL: https://github.com/llvm/llvm-project/commit/d97f17a95982bab49ecdfb9b45ef3c7d7e3d143e
DIFF: https://github.com/llvm/llvm-project/commit/d97f17a95982bab49ecdfb9b45ef3c7d7e3d143e.diff

LOG: [MC][ARM] Fix crash when assembling Thumb 'movs r0,#foo'. (#115026)

If the assembler sees this instruction, understanding `foo` to be an
external symbol, there's no relocation it can write that will put the
whole value of `foo` into the 8-bit immediate field of the 16-bit Thumb
add instruction. So it should report an error message pointing at the
source line, and in LLVM 18, it did exactly that. But now the error is
not reported, due to an indexing error in the operand list in
`validateInstruction`, and instead the code continues to attempt
assembly, ending up aborting at the `llvm_unreachable` at the end of
`getHiLoImmOpValue`.

In this commit I've fixed the index in the `ARM::tMOVi8` case of
`validateInstruction`, and also the one for `tADDi8` which must cope
with either the 2- or 3-operand form in the input assembly source. But
also, while writing the test, I found that if you assemble for Armv7-M
instead of Armv6-M, the instruction has opcode `t2ADDri` when it goes
through `validateInstruction`, and only turns into `tMOVi8` later in
`processInstruction`. Then it's too late for `validateInstruction` to
report that error. So I've adjusted `processInstruction` to spot that
case and inhibit the conversion.

Added: 
    llvm/test/MC/ARM/lower-upper-errors-2.s
    llvm/test/MC/ARM/lower-upper-errors.s

Modified: 
    llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 0df1c336a22146..0dc637fc08aca3 100644

--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8294,7 +8294,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     break;
   }
   case ARM::tADDi8: {
-    MCParsedAsmOperand &Op = *Operands[MnemonicOpsEndInd + 1];
+    int i = (Operands[MnemonicOpsEndInd + 1]->isImm()) ? MnemonicOpsEndInd + 1
+                                                       : MnemonicOpsEndInd + 2;
+    MCParsedAsmOperand &Op = *Operands[i];
     if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
       return Error(Op.getStartLoc(),
                    "Immediate expression for Thumb adds requires :lower0_7:,"
@@ -8302,7 +8304,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
     break;
   }
   case ARM::tMOVi8: {
-    MCParsedAsmOperand &Op = *Operands[MnemonicOpsEndInd];
+    MCParsedAsmOperand &Op = *Operands[MnemonicOpsEndInd + 1];
     if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
       return Error(Op.getStartLoc(),
                    "Immediate expression for Thumb movs requires :lower0_7:,"
@@ -10709,14 +10711,26 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     // the flags are compatible with the current IT status, use encoding T2
     // instead of T3. For compatibility with the system 'as'. Make sure the
     // wide encoding wasn't explicit.
-    if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
-        !isARMLowRegister(Inst.getOperand(0).getReg()) ||
-        (Inst.getOperand(2).isImm() &&
-         (unsigned)Inst.getOperand(2).getImm() > 255) ||
-        Inst.getOperand(5).getReg() !=
-            (inITBlock() ? ARM::NoRegister : ARM::CPSR) ||
-        HasWideQualifier)
-      break;
+    if (HasWideQualifier)
+      break; // source code has asked for the 32-bit instruction
+    if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg())
+      break; // tADDi8 can't take 
diff erent input and output registers
+    if (!isARMLowRegister(Inst.getOperand(0).getReg()))
+      break; // high register that tADDi8 can't access
+    if (Inst.getOperand(5).getReg() !=
+        (inITBlock() ? ARM::NoRegister : ARM::CPSR))
+      break; // flag-modification would require overriding the IT state
+    if (Inst.getOperand(2).isImm()) {
+      if ((unsigned)Inst.getOperand(2).getImm() > 255)
+        break; // large immediate that tADDi8 can't contain
+    } else {
+      int i = (Operands[MnemonicOpsEndInd + 1]->isImm())
+                  ? MnemonicOpsEndInd + 1
+                  : MnemonicOpsEndInd + 2;
+      MCParsedAsmOperand &Op = *Operands[i];
+      if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
+        break; // a type of non-immediate that tADDi8 can't represent
+    }
     MCInst TmpInst;
     TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
                       ARM::tADDi8 : ARM::tSUBi8);

diff  --git a/llvm/test/MC/ARM/lower-upper-errors-2.s b/llvm/test/MC/ARM/lower-upper-errors-2.s
new file mode 100644
index 00000000000000..58c1f117923001
--- /dev/null
+++ b/llvm/test/MC/ARM/lower-upper-errors-2.s
@@ -0,0 +1,18 @@
+// RUN: not llvm-mc --triple thumbv7m -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+// This test checks reporting of errors of the form "you should have
+// used :lower16: in this immediate field", when the errors are
+// discovered at the object-file output stage by checking the set of
+// available relocations.
+//
+// For errors that are reported earlier, when initially reading the
+// instructions, see lower-upper-errors.s.
+
+// CHECK: [[@LINE+1]]:1: error: unsupported relocation
+adds r0, r0, #foo
+
+// CHECK: [[@LINE+1]]:1: error: unsupported relocation
+add r9, r0, #foo
+
+// CHECK: [[@LINE+1]]:1: error: expected relocatable expression
+movs r11, :upper8_15:#foo

diff  --git a/llvm/test/MC/ARM/lower-upper-errors.s b/llvm/test/MC/ARM/lower-upper-errors.s
new file mode 100644
index 00000000000000..d97432b607cef3
--- /dev/null
+++ b/llvm/test/MC/ARM/lower-upper-errors.s
@@ -0,0 +1,51 @@
+// RUN: not llvm-mc --triple thumbv6m %s 2>&1 | FileCheck %s --check-prefixes=CHECK,THUMB1
+// RUN: not llvm-mc --triple thumbv7m %s 2>&1 | FileCheck %s --check-prefixes=CHECK,THUMB2
+
+// This test checks reporting of errors of the form "you should have
+// used :lower16: in this immediate field", during initial reading of
+// the source file.
+//
+// For errors that are reported at object-file output time, see
+// lower-upper-errors-2.s.
+
+// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb movs requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
+movs r0, #foo
+
+// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
+adds r0, #foo
+
+// THUMB2: :[[@LINE+1]]:10: error: immediate expression for mov requires :lower16: or :upper16
+movw r0, #foo
+
+// THUMB2: :[[@LINE+1]]:10: error: immediate expression for mov requires :lower16: or :upper16
+movt r0, #foo
+
+// With a Thumb2 wide add instruction available, this case isn't an error
+// while reading the source file. It only causes a problem when an object
+// file is output, and it turns out there's no suitable relocation to ask
+// for the value of an external symbol to be turned into a Thumb shifted
+// immediate field. And in this case the other errors in this source file
+// cause assembly to terminate before reaching the object-file output stage
+// (even if a test run had had -filetype=obj).
+
+// THUMB1: :[[@LINE+2]]:14: error: Immediate expression for Thumb adds requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
+// THUMB2-NOT: :[[@LINE+1]]:{{[0-9]+}}: error:
+adds r0, r0, #foo
+
+// Similarly for this version, which _must_ be the wide encoding due
+// to the use of a high register and the lack of flag-setting.
+
+// THUMB1: :[[@LINE+2]]:1: error: invalid instruction
+// THUMB2-NOT: :[[@LINE+1]]:{{[0-9]+}}: error:
+add r9, r0, #foo
+
+// CHECK: :[[@LINE+1]]:10: error: Immediate expression for Thumb movs requires :lower0_7:, :lower8_15:, :upper0_7: or :upper8_15:
+movs r0, :lower16:#foo
+
+// This is invalid in either architecture: in Thumb1 it can't use a
+// high register, and in Thumb2 it can't use :upper8_15:. But the
+// Thumb2 case won't cause an error until output.
+
+// THUMB1: :[[@LINE+2]]:1: error: invalid instruction
+// THUMB2-NOT: :[[@LINE+1]]:{{[0-9]+}}: error:
+movs r11, :upper8_15:#foo