[clang] [llvm] [inlineasm] Add special support for "rm" inline asm constraints (PR #181973)
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 18 07:22:10 PDT 2026
================
@@ -1,171 +1,425 @@
-//===-- InlineAsmPrepare - Prepare inline asm for code gen ----------------===//
+//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
-// codegen.
+// This pass lowers callbrs and inline asm in LLVM IR in order to assist
+// SelectionDAG's codegen.
//
-// In particular, this pass assists in inserting register copies for the output
-// values of a callbr along the edges leading to the indirect target blocks.
-// Though the output SSA value is defined by the callbr instruction itself in
-// the IR representation, the value cannot be copied to the appropriate virtual
-// registers prior to jumping to an indirect label, since the jump occurs
-// within the user-provided assembly blob.
+// CallBrInst:
//
-// Instead, those copies must occur separately at the beginning of each
-// indirect target. That requires that we create a separate SSA definition in
-// each of them (via llvm.callbr.landingpad), and may require splitting
-// critical edges so we have a location to place the intrinsic. Finally, we
-// remap users of the original callbr output SSA value to instead point to the
-// appropriate llvm.callbr.landingpad value.
+// - Assists in inserting register copies for the output values of a callbr
+// along the edges leading to the indirect target blocks. Though the output
+// SSA value is defined by the callbr instruction itself in the IR
+// representation, the value cannot be copied to the appropriate virtual
+// registers prior to jumping to an indirect label, since the jump occurs
+// within the user-provided assembly blob.
//
-// Ideally, this could be done inside SelectionDAG, or in the
-// MachineInstruction representation, without the use of an IR-level intrinsic.
-// But, within the current framework, it’s simpler to implement as an IR pass.
-// (If support for callbr in GlobalISel is implemented, it’s worth considering
-// whether this is still required.)
+// Instead, those copies must occur separately at the beginning of each
+// indirect target. That requires that we create a separate SSA definition
+// in each of them (via llvm.callbr.landingpad), and may require splitting
+// critical edges so we have a location to place the intrinsic. Finally, we
+// remap users of the original callbr output SSA value to instead point to
+// the appropriate llvm.callbr.landingpad value.
+//
+// Ideally, this could be done inside SelectionDAG, or in the
+// MachineInstruction representation, without the use of an IR-level
+// intrinsic. But, within the current framework, it’s simpler to implement
+// as an IR pass. (If support for callbr in GlobalISel is implemented,
+// it’s worth considering whether this is still required.)
+//
+// InlineAsm:
+//
+// - Prepares inline assembly for code generation with the fast register
+// allocator. In particular, it defaults "rm" (register-or-memory) to
+// prefer the "m" constraints (the front-end opts for the "r" constraint),
+// simplifying register allocation by forcing operands to memory locations.
+// The other register allocators are equipped to handle folding registers
+// already, so don't need to change the default.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/InlineAsmPrepare.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
#define DEBUG_TYPE "inline-asm-prepare"
-static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
-static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
- DominatorTree &DT);
-static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
- SSAUpdater &SSAUpdate);
-static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F);
-
namespace {
class InlineAsmPrepare : public FunctionPass {
public:
InlineAsmPrepare() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
bool runOnFunction(Function &F) override;
+
static char ID;
};
-} // end anonymous namespace
+char InlineAsmPrepare::ID = 0;
-PreservedAnalyses InlineAsmPreparePass::run(Function &F,
- FunctionAnalysisManager &FAM) {
- bool Changed = false;
- SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F);
+} // end anonymous namespace
- if (CBRs.empty())
- return PreservedAnalyses::all();
+INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+ false, false)
- auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+FunctionPass *llvm::createInlineAsmPreparePass() {
+ return new InlineAsmPrepare();
+}
- Changed |= SplitCriticalEdges(CBRs, DT);
- Changed |= InsertIntrinsicCalls(CBRs, DT);
+//===----------------------------------------------------------------------===//
+// Process InlineAsm instructions
+//===----------------------------------------------------------------------===//
- if (!Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- return PA;
+/// The inline asm constraint allows both register and memory.
+static bool IsRegMemConstraint(StringRef Constraint) {
+ return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr");
}
-char InlineAsmPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare",
- "Prepare inline asm insts", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare",
- "Prepare inline asm insts", false, false)
+/// Tag "rm" output constraints with '*' to signify that they default to a
+/// memory location.
+static std::pair<std::string, bool>
+ConvertConstraintsToMemory(StringRef ConstraintStr) {
+ auto I = ConstraintStr.begin(), E = ConstraintStr.end();
+ std::string Out;
+ raw_string_ostream O(Out);
+ bool HasRegMem = false;
+
+ while (I != E) {
+ bool IsOutput = false;
+ bool HasIndirect = false;
+ if (*I == '=') {
+ O << *I;
+ IsOutput = true;
+ ++I;
+ if (I == E)
+ return {};
+ }
+ if (*I == '*') {
+ O << '*';
+ HasIndirect = true;
+ ++I;
+ if (I == E)
+ return {};
+ }
+ if (*I == '+') {
+ O << '+';
+ IsOutput = true;
+ ++I;
+ if (I == E)
+ return {};
+ }
-FunctionPass *llvm::createInlineAsmPreparePass() {
- return new InlineAsmPrepare();
+ auto Comma = std::find(I, E, ',');
+ std::string Sub(I, Comma);
+ if (IsRegMemConstraint(Sub)) {
+ HasRegMem = true;
+ if (IsOutput && !HasIndirect)
+ O << '*';
+ }
+
+ O << Sub;
+
+ if (Comma == E)
+ break;
+
+ O << ',';
+ I = Comma + 1;
+ }
+
+ return {Out, HasRegMem};
}
-void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTreeWrapperPass>();
+/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an
+/// input tied to output constraint j.
+static void
+BuildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints,
+ SmallVectorImpl<int> &TiedOutput) {
+ for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
+ const InlineAsm::ConstraintInfo &C = Constraints[I];
+ if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) {
+ int InputIdx = C.MatchingInput;
+ if (InputIdx >= 0 && InputIdx < (int)Constraints.size())
+ TiedOutput[InputIdx] = I;
+ }
+
+ if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) {
+ int OutputIdx = C.MatchingInput;
+ if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size())
+ TiedOutput[I] = OutputIdx;
+ }
+ }
}
-SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) {
- SmallVector<CallBrInst *, 2> CBRs;
- for (BasicBlock &BB : F)
- if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
- if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
- CBRs.push_back(CBR);
- return CBRs;
+/// Process an output constraint, creating allocas for converted constraints.
+static void ProcessOutputConstraint(
+ const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx,
+ IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs,
+ SmallVectorImpl<Type *> &NewArgTypes, SmallVectorImpl<Type *> &NewRetTypes,
+ SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
+ SmallVectorImpl<AllocaInst *> &OutputAllocas, unsigned ConstraintIdx) {
+ Type *SlotTy = RetTy;
+ if (StructType *ST = dyn_cast<StructType>(RetTy))
+ SlotTy = ST->getElementType(OutputIdx);
+
+ if (C.hasRegMemConstraints()) {
+ // Converted to memory constraint. Create alloca and pass pointer as
+ // argument.
+ AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem");
+ NewArgs.push_back(Slot);
+ NewArgTypes.push_back(Slot->getType());
+ ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy});
+ OutputAllocas[ConstraintIdx] = Slot;
+ // No return value for this output since it's now an out-parameter.
+ } else {
+ // Unchanged, still an output return value.
+ NewRetTypes.push_back(SlotTy);
+ }
}
-bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
- bool Changed = false;
- CriticalEdgeSplittingOptions Options(&DT);
- Options.setMergeIdenticalEdges();
+/// Process an input constraint, handling tied constraints and conversions.
+static void ProcessInputConstraint(const InlineAsm::ConstraintInfo &C,
+ Value *ArgVal, ArrayRef<int> TiedOutput,
+ ArrayRef<AllocaInst *> OutputAllocas,
+ unsigned ConstraintIdx, IRBuilder<> &Builder,
+ IRBuilder<> &EntryBuilder,
+ SmallVectorImpl<Value *> &NewArgs,
+ SmallVectorImpl<Type *> &NewArgTypes) {
+ Type *ArgTy = ArgVal->getType();
+
+ if (TiedOutput[ConstraintIdx] != -1) {
+ int MatchIdx = TiedOutput[ConstraintIdx];
+ if (AllocaInst *Slot = OutputAllocas[MatchIdx]) {
+ // The matched output was converted to memory. Store this input into the
+ // alloca.
+ Builder.CreateStore(ArgVal, Slot);
+
+ // Pass the alloca pointer as the argument, instead of ArgVal. This
+ // ensures the tied "0" constraint matches the "*m" output.
+ NewArgs.push_back(Slot);
+ NewArgTypes.push_back(Slot->getType());
+ return;
+ }
+ }
- // The indirect destination might be duplicated between another parameter...
- // %0 = callbr ... [label %x, label %x]
- // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
- // to split the default destination if it's duplicated between an indirect
- // destination...
- // %1 = callbr ... to label %x [label %x]
- // ...hence starting at 1 and checking against successor 0 (aka the default
- // destination).
- for (CallBrInst *CBR : CBRs)
- for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
- if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
- isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
- if (SplitKnownCriticalEdge(CBR, i, Options))
- Changed = true;
- return Changed;
+ if (C.hasRegMemConstraints()) {
+ // Converted to memory constraint. Create alloca, store input, pass pointer
+ // as argument.
+ AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem");
+ Builder.CreateStore(ArgVal, Slot);
+ NewArgs.push_back(Slot);
+ NewArgTypes.push_back(Slot->getType());
+ } else {
+ // Unchanged
+ NewArgs.push_back(ArgVal);
+ NewArgTypes.push_back(ArgTy);
+ }
}
-bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
- bool Changed = false;
- SmallPtrSet<const BasicBlock *, 4> Visited;
- IRBuilder<> Builder(CBRs[0]->getContext());
- for (CallBrInst *CBR : CBRs) {
- if (!CBR->getNumIndirectDests())
- continue;
+/// Build the return type from the collected return types.
+static Type *BuildReturnType(ArrayRef<Type *> NewRetTypes,
+ LLVMContext &Context) {
+ if (NewRetTypes.empty())
+ return Type::getVoidTy(Context);
- SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(CBR->getType(), CBR->getName());
- SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
- SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+ if (NewRetTypes.size() == 1)
+ return NewRetTypes[0];
+
+ return StructType::get(Context, NewRetTypes);
+}
- for (BasicBlock *IndDest : CBR->getIndirectDests()) {
- if (!Visited.insert(IndDest).second)
+/// Create the new inline assembly call with converted constraints.
+static CallInst *CreateNewInlineAsm(
+ InlineAsm *IA, const std::string &NewConstraintStr, Type *NewRetTy,
+ const SmallVectorImpl<Type *> &NewArgTypes,
+ const SmallVectorImpl<Value *> &NewArgs,
+ const SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
+ CallBase *CB, IRBuilder<> &Builder, LLVMContext &Context) {
+ FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false);
+ InlineAsm *NewIA = InlineAsm::get(
+ NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(),
+ IA->isAlignStack(), IA->getDialect(), IA->canThrow());
+
+ CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs);
+ NewCall->setCallingConv(CB->getCallingConv());
+ NewCall->setAttributes(CB->getAttributes());
+ NewCall->setDebugLoc(CB->getDebugLoc());
----------------
nikic wrote:
Generalize this to copying all metadata? I believe at least `!srcloc` is relevant for inline asm calls.
https://github.com/llvm/llvm-project/pull/181973
More information about the cfe-commits
mailing list