[llvm] bed9efe - [MCDisassembler] Disambiguate Size parameter in tryAddingSymbolicOperand()

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Wed May 25 13:44:42 PDT 2022


Author: Maksim Panchenko
Date: 2022-05-25T13:44:32-07:00
New Revision: bed9efed71b954047aa11d5ed02af433dd9971cf

URL: https://github.com/llvm/llvm-project/commit/bed9efed71b954047aa11d5ed02af433dd9971cf
DIFF: https://github.com/llvm/llvm-project/commit/bed9efed71b954047aa11d5ed02af433dd9971cf.diff

LOG: [MCDisassembler] Disambiguate Size parameter in tryAddingSymbolicOperand()

MCSymbolizer::tryAddingSymbolicOperand() overloaded the Size parameter
to specify either the instruction size or the operand size depending on
the architecture. However, for proper symbolic disassembly on X86, we
need to know both sizes, as an instruction can have two operands, and
the instruction size cannot be reliably calculated based on the operand
offset and its size. Hence, split Size into OpSize and InstSize.

For X86, the new interface allows to fix a couple of issues:
  * Correctly adjust the value of PC-relative operands.
  * Set operand size to zero when the operand is specified implicitly.

Differential Revision: https://reviews.llvm.org/D126101

Added: 
    llvm/unittests/MC/X86/CMakeLists.txt
    llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp

Modified: 
    llvm/include/llvm-c/DisassemblerTypes.h
    llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
    llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
    llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
    llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
    llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
    llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
    llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
    llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
    llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
    llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
    llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
    llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
    llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
    llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
    llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
    llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
    llvm/tools/llvm-objdump/MachODump.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h
index 53baaef110335..6999a350ec918 100644
--- a/llvm/include/llvm-c/DisassemblerTypes.h
+++ b/llvm/include/llvm-c/DisassemblerTypes.h
@@ -38,15 +38,15 @@ typedef void *LLVMDisasmContextRef;
  * one operand with symbolic information.  To determine the symbolic operand
  * information for each operand, the bytes for the specific operand in the
  * instruction are specified by the Offset parameter and its byte widith is the
- * size parameter.  For instructions sets with fixed widths and one symbolic
- * operand per instruction, the Offset parameter will be zero and Size parameter
- * will be the instruction width.  The information is returned in TagBuf and is
- * Triple specific with its specific information defined by the value of
- * TagType for that Triple.  If symbolic information is returned the function
- * returns 1, otherwise it returns 0.
+ * OpSize parameter.  For instructions sets with fixed widths and one symbolic
+ * operand per instruction, the Offset parameter will be zero and InstSize
+ * parameter will be the instruction width.  The information is returned in
+ * TagBuf and is Triple specific with its specific information defined by the
+ * value of TagType for that Triple.  If symbolic information is returned the
+ * function * returns 1, otherwise it returns 0.
  */
-typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC,
-                                  uint64_t Offset, uint64_t Size,
+typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset,
+                                  uint64_t OpSize, uint64_t InstSize,
                                   int TagType, void *TagBuf);
 
 /**

diff  --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
index 7060620b6bd4b..de069ff95c2f1 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h
@@ -181,10 +181,9 @@ class MCDisassembler {
 
 public:
   // Helpers around MCSymbolizer
-  bool tryAddingSymbolicOperand(MCInst &Inst,
-                                int64_t Value,
-                                uint64_t Address, bool IsBranch,
-                                uint64_t Offset, uint64_t InstSize) const;
+  bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
+                                bool IsBranch, uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) const;
 
   void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
 

diff  --git a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
index 18aa781de0a3b..8af3bb2296ec1 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h
@@ -46,7 +46,8 @@ class MCExternalSymbolizer : public MCSymbolizer {
 
   bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
                                 int64_t Value, uint64_t Address, bool IsBranch,
-                                uint64_t Offset, uint64_t InstSize) override;
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override;
   void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream,
                                        int64_t Value,
                                        uint64_t Address) override;

diff  --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
index 3e20e13c1b96f..1efb63f1a1425 100644
--- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
+++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h
@@ -63,12 +63,13 @@ class MCSymbolizer {
   /// \param Address   - Load address of the instruction.
   /// \param IsBranch  - Is the instruction a branch?
   /// \param Offset    - Byte offset of the operand inside the inst.
+  /// \param OpSize    - Size of the operand in bytes.
   /// \param InstSize  - Size of the instruction in bytes.
   /// \return Whether a symbolic operand was added.
   virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
                                         int64_t Value, uint64_t Address,
                                         bool IsBranch, uint64_t Offset,
-                                        uint64_t InstSize) = 0;
+                                        uint64_t OpSize, uint64_t InstSize) = 0;
 
   /// Try to add a comment on the PC-relative load.
   /// For instance, in Mach-O, this is used to add annotations to instructions

diff  --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
index a95385fd7e68a..c6035dca4ce19 100644
--- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp
@@ -22,11 +22,12 @@ MCDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
 
 bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
                                               uint64_t Address, bool IsBranch,
-                                              uint64_t Offset,
+                                              uint64_t Offset, uint64_t OpSize,
                                               uint64_t InstSize) const {
   if (Symbolizer)
-    return Symbolizer->tryAddingSymbolicOperand(
-        Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize);
+    return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value,
+                                                Address, IsBranch, Offset,
+                                                OpSize, InstSize);
   return false;
 }
 

diff  --git a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 7befef86303cb..e3f4cdd21557e 100644
--- a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -31,19 +31,15 @@ class Triple;
 // is found an MCExpr is created with that, else an MCExpr with Value is
 // created. This function returns true if it adds an operand to the MCInst and
 // false otherwise.
-bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
-                                                    raw_ostream &cStream,
-                                                    int64_t Value,
-                                                    uint64_t Address,
-                                                    bool IsBranch,
-                                                    uint64_t Offset,
-                                                    uint64_t InstSize) {
+bool MCExternalSymbolizer::tryAddingSymbolicOperand(
+    MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,
+    bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
   struct LLVMOpInfo1 SymbolicOp;
   std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
   SymbolicOp.Value = Value;
 
   if (!GetOpInfo ||
-      !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
+      !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {
     // Clear SymbolicOp.Value from above and also all other fields.
     std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
 
@@ -53,10 +49,10 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
     // that always makes sense to guess.  But in the case of an immediate it is
     // a bit more questionable if it is an address of a symbol or some other
     // reference.  So if the immediate Value comes from a width of 1 byte,
-    // InstSize, we will not guess it is an address of a symbol.  Because in
+    // OpSize, we will not guess it is an address of a symbol.  Because in
     // object files assembled starting at address 0 this usually leads to
     // incorrect symbolication.
-    if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
+    if (!SymbolLookUp || (OpSize == 1 && !IsBranch))
       return false;
 
     uint64_t ReferenceType;

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index ffbed4c05ef1e..1b65589416c35 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -749,7 +749,7 @@ static DecodeStatus DecodePCRelLabel19(MCInst &Inst, unsigned Imm,
     ImmVal |= ~((1LL << 19) - 1);
 
   if (!Decoder->tryAddingSymbolicOperand(
-          Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4))
+          Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(ImmVal));
   return Success;
 }
@@ -1030,7 +1030,7 @@ DecodeUnsignedLdStInstruction(MCInst &Inst, uint32_t insn, uint64_t Addr,
   }
 
   DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-  if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(offset));
   return Success;
 }
@@ -1640,7 +1640,7 @@ static DecodeStatus DecodeAdrInstruction(MCInst &Inst, uint32_t insn,
     imm |= ~((1LL << 21) - 1);
 
   DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
-  if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(imm));
 
   return Success;
@@ -1675,7 +1675,7 @@ static DecodeStatus DecodeAddSubImmShift(MCInst &Inst, uint32_t insn,
     DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
   }
 
-  if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(ImmVal));
   Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
   return Success;
@@ -1690,7 +1690,7 @@ static DecodeStatus DecodeUnconditionalBranch(MCInst &Inst, uint32_t insn,
   if (imm & (1 << (26 - 1)))
     imm |= ~((1LL << 26) - 1);
 
-  if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(imm));
 
   return Success;
@@ -1742,7 +1742,7 @@ static DecodeStatus DecodeTestAndBranch(MCInst &Inst, uint32_t insn,
   else
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
   Inst.addOperand(MCOperand::createImm(bit));
-  if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
+  if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4))
     Inst.addOperand(MCOperand::createImm(dst));
 
   return Success;

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 5b6f06f8dbb40..11964b2075e5e 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -60,7 +60,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
 /// an operand to the MCInst and Fail otherwise.
 bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
     MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
-    bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+    bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
   if (!SymbolLookUp)
     return false;
   // FIXME: This method shares a lot of code with
@@ -73,8 +73,8 @@ bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
   SymbolicOp.Value = Value;
   uint64_t ReferenceType;
   const char *ReferenceName;
-  if (!GetOpInfo ||
-      !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+  if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
+                               1, &SymbolicOp)) {
     if (IsBranch) {
       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
       const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
index dc72331660ccd..ca677db49739d 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -29,7 +29,8 @@ class AArch64ExternalSymbolizer : public MCExternalSymbolizer {
 
   bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
                                 int64_t Value, uint64_t Address, bool IsBranch,
-                                uint64_t Offset, uint64_t InstSize) override;
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override;
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index bd75272859695..4d5c6d1875860 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -82,7 +82,7 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
   APInt SignedOffset(18, Imm * 4, true);
   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
 
-  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
+  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
     return MCDisassembler::Success;
   return addOperand(Inst, MCOperand::createImm(Imm));
 }
@@ -1918,10 +1918,10 @@ AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
 //===----------------------------------------------------------------------===//
 
 // Try to find symbol name for specified label
-bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
-                                raw_ostream &/*cStream*/, int64_t Value,
-                                uint64_t /*Address*/, bool IsBranch,
-                                uint64_t /*Offset*/, uint64_t /*InstSize*/) {
+bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
+    MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
+    uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
+    uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
 
   if (!IsBranch) {
     return false;

diff  --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index f46ad9d422095..5db64843176ca 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -217,8 +217,8 @@ class AMDGPUSymbolizer : public MCSymbolizer {
                    : MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
 
   bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
-                                int64_t Value, uint64_t Address,
-                                bool IsBranch, uint64_t Offset,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t OpSize,
                                 uint64_t InstSize) override;
 
   void tryAddingPcLoadReferenceComment(raw_ostream &cStream,

diff  --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index 9e59e9d792854..6181017559045 100644
--- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -181,7 +181,7 @@ static bool DecodeSymbolicOperand(MCInst &Inst, uint64_t Address,
                                   const MCDisassembler *Decoder) {
   static const uint64_t AtLeast = 2;
   return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand(
-                                    Inst, Value, Address, true, 0, AtLeast));
+                                    Inst, Value, Address, true, 0, AtLeast, 0));
 }
 
 static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,

diff  --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8bfa70c160893..9acd49292268d 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -835,7 +835,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
                                      const MCDisassembler *Decoder) {
   // FIXME: Does it make sense for value to be negative?
   return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
-                                           isBranch, /* Offset */ 0, InstSize);
+                                           isBranch, /*Offset=*/0, /*OpSize=*/0,
+                                           InstSize);
 }
 
 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being

diff  --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 58c51e76b8965..58d5df4c1f716 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -768,7 +768,8 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
     Bits = 15;
   uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits));
   uint32_t Extended = FullValue + Address;
-  if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
+  if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0,
+                                             4))
     HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
   return MCDisassembler::Success;
 }

diff  --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index d91670bff9621..e9fecef4ac5b9 100644
--- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool IsBranch,
                                      uint64_t Width, MCInst &MI,
                                      const MCDisassembler *Decoder) {
   return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
-                                           Width);
+                                           Width, /*InstSize=*/0);
 }
 
 static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,

diff  --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index da4d92713ef73..1825b95dd6ac5 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -509,7 +509,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
                                      uint64_t Width, MCInst &MI,
                                      const MCDisassembler *Decoder) {
   return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
-                                           Width);
+                                           Width, /*InstSize=*/4);
 }
 
 static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,

diff  --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 94f0f10868744..979141a1962a0 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -75,7 +75,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
                                      uint64_t Width, MCInst &MI,
                                      const MCDisassembler *Decoder) {
   return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
-                                           Width);
+                                           Width, /*InstSize=*/0);
 }
 
 static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,

diff  --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 8ca5c4830cba1..e8b9ee60233d3 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1874,8 +1874,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   uint64_t pcrel = 0;
   if (type == TYPE_REL) {
     isBranch = true;
-    pcrel = insn.startLocation +
-            insn.immediateOffset + insn.immediateSize;
+    pcrel = insn.startLocation + insn.length;
     switch (operand.encoding) {
     default:
       break;
@@ -1950,9 +1949,9 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     break;
   }
 
-  if (!Dis->tryAddingSymbolicOperand(mcInst, immediate + pcrel,
-                                     insn.startLocation, isBranch,
-                                     insn.immediateOffset, insn.immediateSize))
+  if (!Dis->tryAddingSymbolicOperand(
+          mcInst, immediate + pcrel, insn.startLocation, isBranch,
+          insn.immediateOffset, insn.immediateSize, insn.length))
     mcInst.addOperand(MCOperand::createImm(immediate));
 
   if (type == TYPE_MOFFS) {
@@ -2089,8 +2088,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         return true;
       }
       if (insn.mode == MODE_64BIT){
-        pcrel = insn.startLocation +
-                insn.displacementOffset + insn.displacementSize;
+        pcrel = insn.startLocation + insn.length;
         Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
                                              insn.startLocation +
                                                  insn.displacementOffset);
@@ -2153,9 +2151,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
   mcInst.addOperand(baseReg);
   mcInst.addOperand(scaleAmount);
   mcInst.addOperand(indexReg);
+
+  const uint8_t dispSize =
+      (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
+
   if (!Dis->tryAddingSymbolicOperand(
           mcInst, insn.displacement + pcrel, insn.startLocation, false,
-          insn.displacementOffset, insn.displacementSize))
+          insn.displacementOffset, dispSize, insn.length))
     mcInst.addOperand(displacement);
   mcInst.addOperand(segmentReg);
   return false;

diff  --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index c8c31d2e86638..60c34158941ba 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -2608,7 +2608,8 @@ struct DisassembleInfo {
 // value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
 // information is returned then this function returns 1 else it returns 0.
 static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
-                               uint64_t Size, int TagType, void *TagBuf) {
+                               uint64_t OpSize, uint64_t InstSize, int TagType,
+                               void *TagBuf) {
   struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
   struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
   uint64_t value = op_info->Value;
@@ -2625,7 +2626,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
 
   unsigned int Arch = info->O->getArch();
   if (Arch == Triple::x86) {
-    if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+    if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
       return 0;
     if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
       // TODO:
@@ -2705,7 +2706,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     return 0;
   }
   if (Arch == Triple::x86_64) {
-    if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+    if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
       return 0;
     // For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external
     // relocation entries of a linked image (if any) for an entry that matches
@@ -2737,7 +2738,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
         // adds the Pc.  But for x86_64 external relocation entries the Value
         // is the offset from the external symbol.
         if (info->O->getAnyRelocationPCRel(RE))
-          op_info->Value -= Pc + Offset + Size;
+          op_info->Value -= Pc + InstSize;
         const char *name =
             unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
         op_info->AddSymbol.Present = 1;
@@ -2775,7 +2776,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       // adds the Pc.  But for x86_64 external relocation entries the Value
       // is the offset from the external symbol.
       if (info->O->getAnyRelocationPCRel(RE))
-        op_info->Value -= Pc + Offset + Size;
+        op_info->Value -= Pc + InstSize;
       const char *name =
           unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
       unsigned Type = info->O->getAnyRelocationType(RE);
@@ -2803,7 +2804,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     return 0;
   }
   if (Arch == Triple::arm) {
-    if (Offset != 0 || (Size != 4 && Size != 2))
+    if (Offset != 0 || (InstSize != 4 && InstSize != 2))
       return 0;
     if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
       // TODO:
@@ -2940,7 +2941,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     return 1;
   }
   if (Arch == Triple::aarch64) {
-    if (Offset != 0 || Size != 4)
+    if (Offset != 0 || InstSize != 4)
       return 0;
     if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
       // TODO:

diff  --git a/llvm/unittests/MC/X86/CMakeLists.txt b/llvm/unittests/MC/X86/CMakeLists.txt
new file mode 100644
index 0000000000000..212e6e469f302
--- /dev/null
+++ b/llvm/unittests/MC/X86/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LLVM_LINK_COMPONENTS
+  MC
+  MCDisassembler
+  Target
+  X86Desc
+  X86Disassembler
+  X86Info
+  )
+
+add_llvm_unittest(X86MCTests
+  X86MCDisassemblerTest.cpp
+  )

diff  --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
new file mode 100644
index 0000000000000..97e717cc9b979
--- /dev/null
+++ b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
@@ -0,0 +1,145 @@
+//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+struct Context {
+  const char *TripleName = "x86_64-unknown-elf";
+  std::unique_ptr<MCRegisterInfo> MRI;
+  std::unique_ptr<MCAsmInfo> MAI;
+  std::unique_ptr<MCContext> Ctx;
+  std::unique_ptr<MCSubtargetInfo> STI;
+  std::unique_ptr<MCDisassembler> DisAsm;
+
+  Context() {
+    LLVMInitializeX86TargetInfo();
+    LLVMInitializeX86TargetMC();
+    LLVMInitializeX86Disassembler();
+
+    // If we didn't build x86, do not run the test.
+    std::string Error;
+    const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+    if (!TheTarget)
+      return;
+
+    MRI.reset(TheTarget->createMCRegInfo(TripleName));
+    MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+    STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+    Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
+                                      STI.get());
+
+    DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
+  }
+
+  operator MCContext &() { return *Ctx; };
+};
+
+Context &getContext() {
+  static Context Ctxt;
+  return Ctxt;
+}
+
+class X86MCSymbolizerTest : public MCSymbolizer {
+public:
+  X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
+  ~X86MCSymbolizerTest() {}
+
+  struct OpInfo {
+    int64_t Value = 0;
+    uint64_t Offset = 0;
+    uint64_t Size;
+  };
+  std::vector<OpInfo> Operands;
+  uint64_t InstructionSize = 0;
+
+  void reset() {
+    Operands.clear();
+    InstructionSize = 0;
+  }
+
+  bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override {
+    Operands.push_back({Value, Offset, OpSize});
+    InstructionSize = InstSize;
+    return false;
+  }
+
+  void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+                                       uint64_t Address) override {}
+};
+
+} // namespace
+
+TEST(X86Disassembler, X86MCSymbolizerTest) {
+  X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext());
+  getContext().DisAsm->setSymbolizer(
+      std::unique_ptr<MCSymbolizer>(TestSymbolizer));
+
+  MCDisassembler::DecodeStatus Status;
+  MCInst Inst;
+  uint64_t InstSize;
+
+  auto checkBytes = [&](ArrayRef<uint8_t> Bytes) {
+    TestSymbolizer->reset();
+    Status =
+        getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls());
+    ASSERT_TRUE(Status == MCDisassembler::Success);
+    EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize);
+  };
+
+  auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset,
+                          uint64_t Size) {
+    ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Size, Size);
+  };
+
+  // movq    $0x80000, 0x80000
+  checkBytes(
+      {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00});
+  checkOperand(0, 0x80000, 4, 4);
+  checkOperand(1, 0x80000, 8, 4);
+
+  // movq   $0x2a, 0x123(%rax,%r14,8)
+  checkBytes(
+      {0x4a, 0xc7, 0x84, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00});
+  checkOperand(0, 291, 4, 4);
+  checkOperand(1, 42, 8, 4);
+
+  // movq   $0xffffffffffffefe8, -0x1(%rip)
+  // Test that the value of the rip-relative operand is set correctly.
+  // The instruction address is 0 and the size is 12 bytes.
+  checkBytes(
+      {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff});
+  checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3, 4);
+  checkOperand(1, 0xffffffffffffefe8, 7, 4);
+
+  // movq   $0xfffffffffffffef5, (%r12)
+  // Test that the displacement operand has a size of 0, since it is not
+  // explicitly specified in the instruction.
+  checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff});
+  checkOperand(0, 0, 4, 0);
+  checkOperand(1, 0xfffffffffffffef5, 4, 4);
+}


        


More information about the llvm-commits mailing list