<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/76173>76173</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Weird Pass By Reference Issue When Trying To Use the llvm-mc.cpp Assmbler Script in My Own Project
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
matinraayai
</td>
</tr>
</table>
<pre>
My goal is to use LLVM MC to parse a simple assembly file in memory (without any directives, most likely a basic block) and generate machine code (preferably with only the .text section, I want to embed this in an ELF manually myself). I am targeting AMDGPU.
Naturally I looked at how it's done [here](https://github.com/llvm/llvm-project/blob/7bd17212ef23a72ea224a037126d33d3e02553fe/llvm/tools/llvm-mc/llvm-mc.cpp#L323) in [llvm-mc.cpp](https://github.com/llvm/llvm-project/blob/7bd17212ef23a72ea224a037126d33d3e02553fe/llvm/tools/llvm-mc/llvm-mc.cpp).
I created the following function to use with the AMDGPU target:
```c++
#include <string>
#include <iostream>
#include <memory>
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
llvm::SmallVector<char> assemble(const std::string &instListStr) {
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUDisassembler();
LLVMInitializeAMDGPUAsmParser();
LLVMInitializeAMDGPUAsmPrinter();
LLVMInitializeAMDGPUTargetMCA();
std::string isaName{"amdgcn-amd-amdhsa--gfx908"};
std::string Error;
std::cout << "Target name: " << isaName << std::endl;
const llvm::Target *TheTarget = llvm::TargetRegistry::lookupTarget(isaName, Error);
assert(TheTarget);
std::unique_ptr<const llvm::MCRegisterInfo> MRI(TheTarget->createMCRegInfo(llvm::StringRef(isaName)));
assert(MRI);
llvm::MCTargetOptions MCOptions;
std::unique_ptr<const llvm::MCAsmInfo> MAI(
TheTarget->createMCAsmInfo(*MRI, isaName, MCOptions));
assert(MAI);
std::unique_ptr<const llvm::MCInstrInfo> MII(TheTarget->createMCInstrInfo());
assert(MII);
std::unique_ptr<const llvm::MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(isaName, "gfx908", "+sramecc-xnack"));
assert(STI);
// MatchAndEmitInstruction in MCTargetAsmParser.h
// Now that GetTarget() has (potentially) replaced TripleName, it's safe to
// construct the Triple object.
llvm::Triple TheTriple(isaName);
// std::unique_ptr<llvm::MemoryBuffer> BufferPtr = llvm::MemoryBuffer::getMemBuffer(instListStr, "", true);
//
// llvm::MemoryBuffer *Buffer = BufferPtr.get();
// auto SrcMgr = std::make_unique<llvm::SourceMgr>();
llvm::SourceMgr SrcMgr;
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
// if (MAttrs.size()) {
// SubtargetFeatures Features;
// for (unsigned i = 0; i != MAttrs.size(); ++i)
// Features.AddFeature(MAttrs[i]);
// FeaturesStr = Features.getString();
// }
// std::unique_ptr<llvm::MCContext> Ctx(new (std::nothrow)
// llvm::MCContext(llvm::Triple(isaName), MAI.get(), MRI.get(),
// &SrcMgr,
// &MCOptions,
// STI.get()));
// assert(Ctx);
// FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and
// MCObjectFileInfo needs a MCContext reference in order to initialize itself.
llvm::MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr,
&MCOptions);
std::unique_ptr<llvm::MCObjectFileInfo> MOFI(
TheTarget->createMCObjectFileInfo(Ctx, /*PIC*/ true, /*large code model*/ false));
Ctx.setObjectFileInfo(MOFI.get());
Ctx.setAllowTemporaryLabels(false);
Ctx.setGenDwarfForAssembly(false);
llvm::SmallVector<char> out;
llvm::raw_svector_ostream VOS(out);
std::unique_ptr<llvm::buffer_ostream> BOS;
std::unique_ptr<llvm::MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
assert(MCII && "Unable to create instruction info!");
llvm::MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, Ctx);
llvm::MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
std::unique_ptr<llvm::MCStreamer> Str(TheTarget->createMCObjectStreamer(
TheTriple, Ctx, std::unique_ptr<llvm::MCAsmBackend>(MAB),
MAB->createObjectWriter(VOS),
std::unique_ptr<llvm::MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ false));
// Str->initSections(true, *STI);
// Use Assembler information for parsing.
Str->setUseAssemblerInfoForParsing(false);
// Tell SrcMgr about this buffer, which is what the parser will pick up.
assert(llvm::MemoryBuffer::getMemBuffer(instListStr, "", true)->getBuffer() == llvm::StringRef(instListStr));
unsigned srcId = SrcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBufferCopy(instListStr), llvm::SMLoc());
SrcMgr.getBufferInfo(srcId);
std::unique_ptr<llvm::MCAsmParser> Parser(
llvm::createMCAsmParser(SrcMgr, Ctx, *Str, *MAI));
std::unique_ptr<llvm::MCTargetAsmParser> TAP(
TheTarget->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
assert(TAP && "this target does not support assembly parsing.\n");
// int SymbolResult = fillCommandLineSymbols(*Parser);
// if(SymbolResult)
// return SymbolResult;
Parser->setShowParsedOperands(true);
Parser->setTargetParser(*TAP);
Parser->getLexer().setLexMasmIntegers(true);
Parser->getLexer().setLexMasmHexFloats(true);
Parser->getLexer().setLexMotorolaIntegers(true);
Parser->Run(false);
return out;
}
```
I ran this function with the following input: ```instListStr = "s_load_dword s0, s[4:5], 0x4"```, but I get a segfault when creating the ```Parser``` using the ```llvm::createMCAsmParser``` factory method. After running the code on LLVM with debug information, the issue is found to be an assertion failing, showing that there are no sources added to ```SrcMgr``` in the first place; However, I can confirm that, before calling the parser factory function, my src buffer has been added and can be accessed from the ```SrcMgr``` by checking if ```SrcMgr.getBufferInfo(srcId);``` fails.
It seems that when ```SrcMgr``` is passed by reference, the underlying ```Buffers``` is not passed correctly. Inspecting with GDB shows that both the ```SM``` inside the factory function and the ```SrcMgr``` inside my function point to the same region of memory, but the ```Buffers``` field of ```SM``` inside the factory function points to ```nullptr```. I'm not sure why this happens, and what is causing this. Any help with this would be greatly appreciated.
Thanks in Advance
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzMGltzmzz215CXM_U4Is7lIQ-YxP08E7eZOP26bx0BB6ONkFhJ1PH363ckAQabJE5nHzaTpgbOTed-DqZas41AvA1m82B2d0ZrU0h1W1LDhKJ0R9lZIrPd7WoHG0k5MA1GQq0RHh7-XsEqtpcVVRqBgmZlxRGo1lgmfAc54whMQImlVDsIyPWWmULWBqjYQcYUpob9Rh2QGEqpDXD2gnwHFBKqWQoJl-lLQG6Aigw2KFBRg1DStGACIZUZWpqVwhwVtQwteZCC78AUCBODrwa0ZSKF5bGELRXGSoxlghmYgmkrHxVw_7CAkoqacr6DcqeR5wG5mcASaAmGqg0aJjYQre6-Pv6YBNO7YBr5v9-oqZVDWwKX8gUzoAYKuQVmAnKlIZMCIZjNC1QYzO4Ccl0YU-kgjAKyCMhiw0xRJ5NUlgFZcP67_e9LpeS_MTUBWSRcJgFZXCXZ-RU5J5iTkF4RpIRc0Gl4dU4uszDMQpyS2SzMcU_HSMl1S69M958maVUFJHwISWgVzISVsP_s_0vQm4HKl5AqpMZZECGXnMuttU5eC2fr1kWdO1gQb7bGjvY8ntjl1P-mAZnbX3-XhEykvLbOFcbaKCY2QXg_9pBJbRTS8o3H3u1HHxLSnHsVuz-RLuc0fUGRTYqAkJMQliKXJ0LHMsP7khmD6mQMYaPnROg7ppugR3V040QaS6HNo2LidBkthooE5TvN9Gdw7lCnn4H_hKa_JzYSfip2-jEebfZUrVUf8PUPMf3V51GfXVR8lsATbpg2-BndrF2wnMxhXSc-YE9m4U_iJVO7j1DWdVVJZbPWWtYqxdXmQ8n2KJ7VGjmmgyBxfx10GAVhtC4p539jaqQKwjgtqArC-7ZAYkCuUym0AW0yD-_TDQTkkgltHpg2a6Nsgg6u5n0OAOAK8FIwwyhn_6BPcV6sgFwH5CYIG5R3IK1qD6HfJW31_DHxYT74GL5zvhNlsfA-VZx-1FUcHQDveRzqn2n6jZZolU4ILbNNKr7QMrP_Ck2_fNnkrzfTa2v1q7uBsIeE7pWylm8husepbYKCMA7C2LqXlxCE5RlG9k77sJGkvewIoMj4yEG8O-39ryEckOi5wPYivDuCaGPG37NtTF11vtRqg8TNeQ4sZE1tg-K64_GBlmvB_lPjr8q4oDgQeZhabLisnpZ94l-C8N7XfwfauHAv5pzqnzDvi37T_HZyd0I76mPi9kXynL9XtrvQsIqbTyOW_eBsTd12x4rssRr80dO1wNZtIydn5xD2416M4dmOThi9ccJThe6KoBN7-aY19nA-0N7yE0fiSKBTpRnUBSvR-vkjRQ5RBh4dELKPZn8dkLlWtMQ0_fIqqJ1AyHuncezH1Ou7ZlhRkxaRyGwD5jRU-yaVCRgrvuNEvsktmIIa-Iqmn-ShoNoNQdKgsFmP7-xdhRWnKWbwrFjFsT1sM5FomiMYecTE6dqK55pmjwrSdTSTsbhoIKzK3adhwA0V0rB40-t6FnZt87zOc3TV0n96NOogcw3h7B2b5bFs7pDrQQltLOtsbFQ9lNDJdijnOCebS9tP4d1euMlx3R0QpLWRsFbpauMROy2U9AV_eVUM1NA1JXaAOC6LI4AN-QFYw_2Rpi90g1BXkKMdWNHN8QlCZf04sxfNdEQWuo2Ww8BsitqioWD1OsILAFgOLu8Yo_REs3-wSwj7RqaH0YVnS7lj8QZ9-5NLa4vrWrgFRgbMqXUahHP7kZzbq2MJwjn4YY_ZqzdI259WgkmUZc3n7kjBbM7cgNwzyZBCT0VOrI6abRr9SHls0j0J21aM54EPg6ed3GzkxOY1INcCt1ZRHaaQplByO3b-MTr90joW5rYQRcue-9sbT8MbR4wCctk46_jDXmk7fr5-HlA_ysx7PXY52mninRy9WP5rdW9br2e3EtIgpIFKoTG7CXS6cMmWQmWUjZd22FswjrauABXZ0BmOIARiZinsKbrdFYrU7cmkytCRZl0HC8xo5Plo-t1T8Ybu5eETbHKoxvgNo4z9DC1081Eb1Bd5qBHXUHxffFS_D5Aae8Zez9HjMg6IzbNNYm_vc0vHrwlLmSFvgHLKNY64TWxeJxrNES8r39DfDtyoQYw4l9tnLCupqNo90AS5Dsh1x24c6yuKuy1V-UKqqNmbvoM0TPyjw6WszbtYim5_6d8O61ezv4K_v68Dcm0xT23L9vQSVwF_7VdhMP--PqyBnyF42G_Gf9Zw7rvNeLm0HhuQS9sD_BA04bb9aZaIwAY9maV13nR8B5oYRl63UbMdQXzv0vyokD3QppN3J4rhICkdzgnNQtCSX0Xzt-nvQT1514_GsB8ZxkP1lE6sW9m4Ntu2UeN28DHTAXfB3PwMElMTuh_z7p3LdUCraD6oJqtovpehv3cLyLVz6B7sKWVzbyXHLr7v0mKr0k6REzt5cvoacd6Tp_d0KVKFJQpD-QMTL6hiWVbUMLf0iftVIrr7GT0tVrU2c4zMc4H31o5vJqrDJnVtlFWCLRhr_6LD5px9How-mk5-aISo3dU4_1cldbFgG6yKKs3Epi0_DTeN5ofGDssG4EKqRw_7fvZquD4j5203TBNZG_8uJmla9xi2BUsLW4i3duix44h7yaRgyziHiqUvUFeT8Xn3fzgi2NNu0HTwroMN74aDyGDhMFjbHQZc16xqlS4zF9JeCbbL_IZb38p33E49SCyr3QjvuC_j6kGm42N5I0F3zCafOhH_oB70dtE2a-z3eodJrpe_OqCu_4CuwkedbaJmkfG5huNgwrYyPUePH29eegvJXkpt73p5mkR-6iLmOXrsFSLn8X7ugUyi7zm13zDv36F2ETiLxXhZ6o1dwsB6VyaSP6GuuV_15YzzWJYlFdkDE-ifa3-u9jSj_TOz_twn987EpNDUSgyY70l6Lk3iWBdy625k3ytUVGT7fHXzBoo3T98ezoCj4Bs07g2Kd3XbYD3g64rqcikMblB9wO1N9L_wdcElNX-EL41UktN3RDgi9FSL9xNpo_FBt9dNje2LTX-5BEWFT7DdG9Lu1ej-7SkTlSUWQYfeSyjOlQJC9C8uafYr20qVgZ66Sh7M5hdBGM3cSBzD9PXCOmonA4khqQ0swTo6BY2bnFrn3BYofA9muVtZOpzG1u0l1PoI5L080uHl1La5OyjRFDKbQJTbfk3VQrT03HQghf82g1NKhkm96RdCVw4KBKZ1bf9CLmuRNesTKprwdhWTMu4qYAy68Eo1TflSCFQhCAna5XgNNMv83qUTt0l-nfRMeAsxpQ24fV4QzuEvucXfPgUtIaUCUilypkrHyikbc6kQUsp5e8qmdrbqaL3Afe9iZ4tRU3ndgJsgikY6KjLHwh40TdFtinIly6EpDuVOdpAWmL44p8oP4d4tND3LMa6HL_0NaMRSe5U653lbc7rdayW7_YjdGrIWGSq-cy_aWhQvkR6QcDsATyaVSmFq-G4CS6Er22mJjfeXr3dzZ-1GsEQ2gbUXbtU3qWYZerMeGMMp-129NshlD6eSzH-hxSJqWiIo3NgHMm--ddOG34Dy0WlzhjyzSJ-W2kmgB34sas5tHW6vJ7AMyFXZ1DeFsC12Ph0VtKrQLXnc6V2vxzSktA14picQiR0UyKs2Z9mmUNY8s165sbHPd0CrSmHKqMFs4DTPBRUv7js-UfabWh9wt8-y2zC7CW_oGd6eX03D8-vZRTg7K25vzqfZTX6VhOfTJLwMk2l6mYTTyxkhNLlMk-yM3ZIpCc8JOT-_mYaz2eQ8n5Gc0ourMLzGi_NZcDHFkjI-sQlqItXmzOWN26vL86vwjLutgPuyFSECtz6p2HQ5uztTt-47L0m90cHFlDNt9J6KYYbj7U9kKoNHqjXMd_DULY-WLjf9tEHxrJxnP0vX2lub9b5JY1t93-mvU8Uq415C7OD7VsCj_xLPWa347ae_9-OOoQOycMf8bwAAAP__1SySBQ">