<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/76173>76173</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Weird Pass By Reference Issue When Trying To Use the llvm-mc.cpp Assmbler Script in My Own Project
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          matinraayai
      </td>
    </tr>
</table>

<pre>
    My goal is to use LLVM MC to parse a simple assembly file in memory (without any directives, most likely a basic block) and generate machine code (preferably with only the .text section, I want to embed this in an ELF manually myself). I am targeting AMDGPU.

Naturally I looked at how it's done [here](https://github.com/llvm/llvm-project/blob/7bd17212ef23a72ea224a037126d33d3e02553fe/llvm/tools/llvm-mc/llvm-mc.cpp#L323) in [llvm-mc.cpp](https://github.com/llvm/llvm-project/blob/7bd17212ef23a72ea224a037126d33d3e02553fe/llvm/tools/llvm-mc/llvm-mc.cpp).

I created the following function to use with the AMDGPU target:
```c++
#include <string>
#include <iostream>
#include <memory>
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"

llvm::SmallVector<char> assemble(const std::string &instListStr) {

    LLVMInitializeAMDGPUTarget();
 LLVMInitializeAMDGPUTargetInfo();
    LLVMInitializeAMDGPUTargetMC();
 LLVMInitializeAMDGPUDisassembler();
 LLVMInitializeAMDGPUAsmParser();
    LLVMInitializeAMDGPUAsmPrinter();
 LLVMInitializeAMDGPUTargetMCA();

    std::string isaName{"amdgcn-amd-amdhsa--gfx908"};
    std::string Error;
 std::cout << "Target name: " << isaName << std::endl;

    const llvm::Target *TheTarget = llvm::TargetRegistry::lookupTarget(isaName, Error);
    assert(TheTarget);

    std::unique_ptr<const llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(llvm::StringRef(isaName)));
 assert(MRI);

    llvm::MCTargetOptions MCOptions;
 std::unique_ptr<const llvm::MCAsmInfo> MAI(
 TheTarget->createMCAsmInfo(*MRI, isaName, MCOptions));

 assert(MAI);

    std::unique_ptr<const llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo());
    assert(MII);

 std::unique_ptr<const llvm::MCSubtargetInfo> STI(
 TheTarget->createMCSubtargetInfo(isaName, "gfx908", "+sramecc-xnack"));
    assert(STI);

    // MatchAndEmitInstruction in MCTargetAsmParser.h

    // Now that GetTarget() has (potentially) replaced TripleName, it's safe to
    // construct the Triple object.
    llvm::Triple TheTriple(isaName);

//    std::unique_ptr<llvm::MemoryBuffer> BufferPtr = llvm::MemoryBuffer::getMemBuffer(instListStr, "", true);
//
//    llvm::MemoryBuffer *Buffer = BufferPtr.get();

// auto SrcMgr = std::make_unique<llvm::SourceMgr>();
    llvm::SourceMgr SrcMgr;
    // Package up features to be passed to target/subtarget
 std::string FeaturesStr;
    //    if (MAttrs.size()) {
    // SubtargetFeatures Features;
    //        for (unsigned i = 0; i != MAttrs.size(); ++i)
    //            Features.AddFeature(MAttrs[i]);
 //        FeaturesStr = Features.getString();
    //    }

    // std::unique_ptr<llvm::MCContext> Ctx(new (std::nothrow)
    // llvm::MCContext(llvm::Triple(isaName), MAI.get(), MRI.get(),
    // &SrcMgr,
    // &MCOptions,
    // STI.get()));
    //    assert(Ctx);

    // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and
 // MCObjectFileInfo needs a MCContext reference in order to initialize itself.
    llvm::MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr,
                        &MCOptions);
 std::unique_ptr<llvm::MCObjectFileInfo> MOFI(
 TheTarget->createMCObjectFileInfo(Ctx, /*PIC*/ true, /*large code model*/ false));
    Ctx.setObjectFileInfo(MOFI.get());

 Ctx.setAllowTemporaryLabels(false);

 Ctx.setGenDwarfForAssembly(false);

    llvm::SmallVector<char> out;

    llvm::raw_svector_ostream VOS(out);

 std::unique_ptr<llvm::buffer_ostream> BOS;



 std::unique_ptr<llvm::MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
 assert(MCII && "Unable to create instruction info!");

 llvm::MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
 llvm::MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
    std::unique_ptr<llvm::MCStreamer> Str(TheTarget->createMCObjectStreamer(
        TheTriple, Ctx, std::unique_ptr<llvm::MCAsmBackend>(MAB),
 MAB->createObjectWriter(VOS),
 std::unique_ptr<llvm::MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
 MCOptions.MCIncrementalLinkerCompatible,
 /*DWARFMustBeAtTheEnd*/ false));

//    Str->initSections(true, *STI);

    // Use Assembler information for parsing.
 Str->setUseAssemblerInfoForParsing(false);

    // Tell SrcMgr about this buffer, which is what the parser will pick up.

 assert(llvm::MemoryBuffer::getMemBuffer(instListStr, "", true)->getBuffer() == llvm::StringRef(instListStr));
    unsigned srcId = SrcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBufferCopy(instListStr), llvm::SMLoc());
    SrcMgr.getBufferInfo(srcId);

 std::unique_ptr<llvm::MCAsmParser> Parser(
 llvm::createMCAsmParser(SrcMgr, Ctx, *Str, *MAI));
 std::unique_ptr<llvm::MCTargetAsmParser> TAP(
 TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));

 assert(TAP && "this target does not support assembly parsing.\n");

 //    int SymbolResult = fillCommandLineSymbols(*Parser);
    // if(SymbolResult)
    //        return SymbolResult;
 Parser->setShowParsedOperands(true);
 Parser->setTargetParser(*TAP);
 Parser->getLexer().setLexMasmIntegers(true);
 Parser->getLexer().setLexMasmHexFloats(true);
 Parser->getLexer().setLexMotorolaIntegers(true);

 Parser->Run(false);

    return out;
}
```
I ran this function with the following input: ```instListStr = "s_load_dword s0, s[4:5], 0x4"```, but I get a segfault when creating the ```Parser``` using the ```llvm::createMCAsmParser``` factory method. After running the code on LLVM with debug information, the issue is found to be an assertion failing, showing that there are no sources added to ```SrcMgr``` in the first place; However, I can confirm that, before calling the parser factory function, my src buffer has been added and can be accessed from the ```SrcMgr``` by checking if ```SrcMgr.getBufferInfo(srcId);``` fails.

It seems that when ```SrcMgr``` is passed by reference, the underlying ```Buffers``` is not passed correctly. Inspecting with GDB shows that both the ```SM``` inside the factory function and the ```SrcMgr``` inside my function point to the same region of memory, but the ```Buffers``` field of ```SM``` inside the factory function points to ```nullptr```. I'm not sure why this happens, and what is causing this. Any help with this would be greatly appreciated.

Thanks in Advance

</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMGltzmzz215CXM_U4Is7lIQ-YxP08E7eZOP26bx0BB6ONkFhJ1PH363ckAQabJE5nHzaTpgbOTed-DqZas41AvA1m82B2d0ZrU0h1W1LDhKJ0R9lZIrPd7WoHG0k5MA1GQq0RHh7-XsEqtpcVVRqBgmZlxRGo1lgmfAc54whMQImlVDsIyPWWmULWBqjYQcYUpob9Rh2QGEqpDXD2gnwHFBKqWQoJl-lLQG6Aigw2KFBRg1DStGACIZUZWpqVwhwVtQwteZCC78AUCBODrwa0ZSKF5bGELRXGSoxlghmYgmkrHxVw_7CAkoqacr6DcqeR5wG5mcASaAmGqg0aJjYQre6-Pv6YBNO7YBr5v9-oqZVDWwKX8gUzoAYKuQVmAnKlIZMCIZjNC1QYzO4Ccl0YU-kgjAKyCMhiw0xRJ5NUlgFZcP67_e9LpeS_MTUBWSRcJgFZXCXZ-RU5J5iTkF4RpIRc0Gl4dU4uszDMQpyS2SzMcU_HSMl1S69M958maVUFJHwISWgVzISVsP_s_0vQm4HKl5AqpMZZECGXnMuttU5eC2fr1kWdO1gQb7bGjvY8ntjl1P-mAZnbX3-XhEykvLbOFcbaKCY2QXg_9pBJbRTS8o3H3u1HHxLSnHsVuz-RLuc0fUGRTYqAkJMQliKXJ0LHMsP7khmD6mQMYaPnROg7ppugR3V040QaS6HNo2LidBkthooE5TvN9Gdw7lCnn4H_hKa_JzYSfip2-jEebfZUrVUf8PUPMf3V51GfXVR8lsATbpg2-BndrF2wnMxhXSc-YE9m4U_iJVO7j1DWdVVJZbPWWtYqxdXmQ8n2KJ7VGjmmgyBxfx10GAVhtC4p539jaqQKwjgtqArC-7ZAYkCuUym0AW0yD-_TDQTkkgltHpg2a6Nsgg6u5n0OAOAK8FIwwyhn_6BPcV6sgFwH5CYIG5R3IK1qD6HfJW31_DHxYT74GL5zvhNlsfA-VZx-1FUcHQDveRzqn2n6jZZolU4ILbNNKr7QMrP_Ck2_fNnkrzfTa2v1q7uBsIeE7pWylm8husepbYKCMA7C2LqXlxCE5RlG9k77sJGkvewIoMj4yEG8O-39ryEckOi5wPYivDuCaGPG37NtTF11vtRqg8TNeQ4sZE1tg-K64_GBlmvB_lPjr8q4oDgQeZhabLisnpZ94l-C8N7XfwfauHAv5pzqnzDvi37T_HZyd0I76mPi9kXynL9XtrvQsIqbTyOW_eBsTd12x4rssRr80dO1wNZtIydn5xD2416M4dmOThi9ccJThe6KoBN7-aY19nA-0N7yE0fiSKBTpRnUBSvR-vkjRQ5RBh4dELKPZn8dkLlWtMQ0_fIqqJ1AyHuncezH1Ou7ZlhRkxaRyGwD5jRU-yaVCRgrvuNEvsktmIIa-Iqmn-ShoNoNQdKgsFmP7-xdhRWnKWbwrFjFsT1sM5FomiMYecTE6dqK55pmjwrSdTSTsbhoIKzK3adhwA0V0rB40-t6FnZt87zOc3TV0n96NOogcw3h7B2b5bFs7pDrQQltLOtsbFQ9lNDJdijnOCebS9tP4d1euMlx3R0QpLWRsFbpauMROy2U9AV_eVUM1NA1JXaAOC6LI4AN-QFYw_2Rpi90g1BXkKMdWNHN8QlCZf04sxfNdEQWuo2Ww8BsitqioWD1OsILAFgOLu8Yo_REs3-wSwj7RqaH0YVnS7lj8QZ9-5NLa4vrWrgFRgbMqXUahHP7kZzbq2MJwjn4YY_ZqzdI259WgkmUZc3n7kjBbM7cgNwzyZBCT0VOrI6abRr9SHls0j0J21aM54EPg6ed3GzkxOY1INcCt1ZRHaaQplByO3b-MTr90joW5rYQRcue-9sbT8MbR4wCctk46_jDXmk7fr5-HlA_ysx7PXY52mninRy9WP5rdW9br2e3EtIgpIFKoTG7CXS6cMmWQmWUjZd22FswjrauABXZ0BmOIARiZinsKbrdFYrU7cmkytCRZl0HC8xo5Plo-t1T8Ybu5eETbHKoxvgNo4z9DC1081Eb1Bd5qBHXUHxffFS_D5Aae8Zez9HjMg6IzbNNYm_vc0vHrwlLmSFvgHLKNY64TWxeJxrNES8r39DfDtyoQYw4l9tnLCupqNo90AS5Dsh1x24c6yuKuy1V-UKqqNmbvoM0TPyjw6WszbtYim5_6d8O61ezv4K_v68Dcm0xT23L9vQSVwF_7VdhMP--PqyBnyF42G_Gf9Zw7rvNeLm0HhuQS9sD_BA04bb9aZaIwAY9maV13nR8B5oYRl63UbMdQXzv0vyokD3QppN3J4rhICkdzgnNQtCSX0Xzt-nvQT1514_GsB8ZxkP1lE6sW9m4Ntu2UeN28DHTAXfB3PwMElMTuh_z7p3LdUCraD6oJqtovpehv3cLyLVz6B7sKWVzbyXHLr7v0mKr0k6REzt5cvoacd6Tp_d0KVKFJQpD-QMTL6hiWVbUMLf0iftVIrr7GT0tVrU2c4zMc4H31o5vJqrDJnVtlFWCLRhr_6LD5px9How-mk5-aISo3dU4_1cldbFgG6yKKs3Epi0_DTeN5ofGDssG4EKqRw_7fvZquD4j5203TBNZG_8uJmla9xi2BUsLW4i3duix44h7yaRgyziHiqUvUFeT8Xn3fzgi2NNu0HTwroMN74aDyGDhMFjbHQZc16xqlS4zF9JeCbbL_IZb38p33E49SCyr3QjvuC_j6kGm42N5I0F3zCafOhH_oB70dtE2a-z3eodJrpe_OqCu_4CuwkedbaJmkfG5huNgwrYyPUePH29eegvJXkpt73p5mkR-6iLmOXrsFSLn8X7ugUyi7zm13zDv36F2ETiLxXhZ6o1dwsB6VyaSP6GuuV_15YzzWJYlFdkDE-ifa3-u9jSj_TOz_twn987EpNDUSgyY70l6Lk3iWBdy625k3ytUVGT7fHXzBoo3T98ezoCj4Bs07g2Kd3XbYD3g64rqcikMblB9wO1N9L_wdcElNX-EL41UktN3RDgi9FSL9xNpo_FBt9dNje2LTX-5BEWFT7DdG9Lu1ej-7SkTlSUWQYfeSyjOlQJC9C8uafYr20qVgZ66Sh7M5hdBGM3cSBzD9PXCOmonA4khqQ0swTo6BY2bnFrn3BYofA9muVtZOpzG1u0l1PoI5L080uHl1La5OyjRFDKbQJTbfk3VQrT03HQghf82g1NKhkm96RdCVw4KBKZ1bf9CLmuRNesTKprwdhWTMu4qYAy68Eo1TflSCFQhCAna5XgNNMv83qUTt0l-nfRMeAsxpQ24fV4QzuEvucXfPgUtIaUCUilypkrHyikbc6kQUsp5e8qmdrbqaL3Afe9iZ4tRU3ndgJsgikY6KjLHwh40TdFtinIly6EpDuVOdpAWmL44p8oP4d4tND3LMa6HL_0NaMRSe5U653lbc7rdayW7_YjdGrIWGSq-cy_aWhQvkR6QcDsATyaVSmFq-G4CS6Er22mJjfeXr3dzZ-1GsEQ2gbUXbtU3qWYZerMeGMMp-129NshlD6eSzH-hxSJqWiIo3NgHMm--ddOG34Dy0WlzhjyzSJ-W2kmgB34sas5tHW6vJ7AMyFXZ1DeFsC12Ph0VtKrQLXnc6V2vxzSktA14picQiR0UyKs2Z9mmUNY8s165sbHPd0CrSmHKqMFs4DTPBRUv7js-UfabWh9wt8-y2zC7CW_oGd6eX03D8-vZRTg7K25vzqfZTX6VhOfTJLwMk2l6mYTTyxkhNLlMk-yM3ZIpCc8JOT-_mYaz2eQ8n5Gc0ourMLzGi_NZcDHFkjI-sQlqItXmzOWN26vL86vwjLutgPuyFSECtz6p2HQ5uztTt-47L0m90cHFlDNt9J6KYYbj7U9kKoNHqjXMd_DULY-WLjf9tEHxrJxnP0vX2lub9b5JY1t93-mvU8Uq415C7OD7VsCj_xLPWa347ae_9-OOoQOycMf8bwAAAP__1SySBQ">