[clang] [llvm] [inlineasm] Add special support for "rm" inline asm constraints (PR #181973)
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 19 07:58:00 PDT 2026
================
@@ -1,204 +1,464 @@
-//===-- InlineAsmPrepare - Prepare inline asm for code gen ----------------===//
+//===-- InlineAsmPrepare - Prepare inline asm for code generation ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
-// codegen.
+// This pass lowers callbrs and inline asm in LLVM IR in order to assist
+// SelectionDAG's codegen.
//
-// In particular, this pass assists in inserting register copies for the output
-// values of a callbr along the edges leading to the indirect target blocks.
-// Though the output SSA value is defined by the callbr instruction itself in
-// the IR representation, the value cannot be copied to the appropriate virtual
-// registers prior to jumping to an indirect label, since the jump occurs
-// within the user-provided assembly blob.
+// CallBrInst:
//
-// Instead, those copies must occur separately at the beginning of each
-// indirect target. That requires that we create a separate SSA definition in
-// each of them (via llvm.callbr.landingpad), and may require splitting
-// critical edges so we have a location to place the intrinsic. Finally, we
-// remap users of the original callbr output SSA value to instead point to the
-// appropriate llvm.callbr.landingpad value.
+// - Assists in inserting register copies for the output values of a callbr
+// along the edges leading to the indirect target blocks. Though the output
+// SSA value is defined by the callbr instruction itself in the IR
+// representation, the value cannot be copied to the appropriate virtual
+// registers prior to jumping to an indirect label, since the jump occurs
+// within the user-provided assembly blob.
//
-// Ideally, this could be done inside SelectionDAG, or in the
-// MachineInstruction representation, without the use of an IR-level intrinsic.
-// But, within the current framework, it’s simpler to implement as an IR pass.
-// (If support for callbr in GlobalISel is implemented, it’s worth considering
-// whether this is still required.)
+// Instead, those copies must occur separately at the beginning of each
+// indirect target. That requires that we create a separate SSA definition
+// in each of them (via llvm.callbr.landingpad), and may require splitting
+// critical edges so we have a location to place the intrinsic. Finally, we
+// remap users of the original callbr output SSA value to instead point to
+// the appropriate llvm.callbr.landingpad value.
+//
+// Ideally, this could be done inside SelectionDAG, or in the
+// MachineInstruction representation, without the use of an IR-level
+// intrinsic. But, within the current framework, it’s simpler to implement
+// as an IR pass. (If support for callbr in GlobalISel is implemented,
+// it’s worth considering whether this is still required.)
+//
+// InlineAsm:
+//
+// - Prepares inline assembly for code generation with the fast register
+// allocator. In particular, it defaults "rm" (register-or-memory) to
+// prefer the "m" constraints (the front-end opts for the "r" constraint),
+// simplifying register allocation by forcing operands to memory locations.
+// The other register allocators are equipped to handle folding registers
+// already, so don't need to change the default.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/InlineAsmPrepare.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <sstream>
using namespace llvm;
#define DEBUG_TYPE "inline-asm-prepare"
-static bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
-static bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
- DominatorTree &DT);
-static void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
- SSAUpdater &SSAUpdate);
-static SmallVector<CallBrInst *, 2> FindCallBrs(Function &F);
-
namespace {
class InlineAsmPrepare : public FunctionPass {
public:
InlineAsmPrepare() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ }
bool runOnFunction(Function &F) override;
+
static char ID;
};
-} // end anonymous namespace
+char InlineAsmPrepare::ID = 0;
-PreservedAnalyses InlineAsmPreparePass::run(Function &F,
- FunctionAnalysisManager &FAM) {
- bool Changed = false;
- SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(F);
+} // end anonymous namespace
- if (CBRs.empty())
- return PreservedAnalyses::all();
+INITIALIZE_PASS_BEGIN(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(InlineAsmPrepare, DEBUG_TYPE, "Prepare inline asm insts",
+ false, false)
- auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+FunctionPass *llvm::createInlineAsmPreparePass() {
+ return new InlineAsmPrepare();
+}
- Changed |= SplitCriticalEdges(CBRs, DT);
- Changed |= InsertIntrinsicCalls(CBRs, DT);
+//===----------------------------------------------------------------------===//
+// Process InlineAsm instructions
+//===----------------------------------------------------------------------===//
- if (!Changed)
- return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- return PA;
+/// The inline asm constraint allows both register and memory.
+static bool isRegMemConstraint(StringRef Constraint) {
+ return Constraint.size() == 2 && (Constraint == "rm" || Constraint == "mr");
}
-char InlineAsmPrepare::ID = 0;
-INITIALIZE_PASS_BEGIN(InlineAsmPrepare, "inline-asm-prepare",
- "Prepare inline asm insts", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(InlineAsmPrepare, "inline-asm-prepare",
- "Prepare inline asm insts", false, false)
+/// Tag "rm" output constraints with '*' to signify that they default to a
+/// memory location.
+static std::tuple<std::string, bool, bool>
+convertConstraintsToMemory(StringRef ConstraintStr) {
+ std::vector<std::string> Constraints;
+ Constraints.reserve(ConstraintStr.count(',') + 1);
+
+ std::istringstream OS(ConstraintStr.str());
+ std::string Constraint;
+ while (std::getline(OS, Constraint, ','))
+ Constraints.push_back(Constraint);
+
+ bool HasRegMem = false;
+ bool MayWriteMem = false;
+ for (auto &Constraint : Constraints) {
+ std::string NewConstraint;
+
+ auto I = Constraint.begin(), E = Constraint.end();
+ bool HasIndirect = false;
+
+ if (*I == '=') {
+ if (Constraint.size() == 1)
+ return {};
+ ++I;
+ NewConstraint += '=';
+ }
+ if (*I == '*') {
+ if (Constraint.size() == 1)
+ return {};
+ ++I;
+ NewConstraint += '*';
+ HasIndirect = true;
+ }
+ if (*I == '+') {
+ if (Constraint.size() == 1)
+ return {};
+ ++I;
+ NewConstraint += '+';
+ }
-FunctionPass *llvm::createInlineAsmPreparePass() {
- return new InlineAsmPrepare();
+ if (isRegMemConstraint(std::string(I, E))) {
+ HasRegMem = true;
+ MayWriteMem = true;
+ if (!HasIndirect)
+ NewConstraint += '*';
+ }
+
+ NewConstraint += std::string(I, E);
+ Constraint = NewConstraint;
+ }
+
+ return {llvm::join(Constraints, ","), HasRegMem, MayWriteMem};
}
-void InlineAsmPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTreeWrapperPass>();
+/// Build a map of tied constraints. TiedOutput[i] = j means Constraint i is an
+/// input tied to output constraint j.
+static void
+buildTiedConstraintMap(const InlineAsm::ConstraintInfoVector &Constraints,
+ SmallVectorImpl<int> &TiedOutput) {
+ for (unsigned I = 0, E = Constraints.size(); I != E; ++I) {
+ const InlineAsm::ConstraintInfo &C = Constraints[I];
+ if (C.Type == InlineAsm::isOutput && C.hasMatchingInput()) {
+ int InputIdx = C.MatchingInput;
+ if (InputIdx >= 0 && InputIdx < (int)Constraints.size())
+ TiedOutput[InputIdx] = I;
+ }
+
+ if (C.Type == InlineAsm::isInput && C.hasMatchingInput()) {
+ int OutputIdx = C.MatchingInput;
+ if (OutputIdx >= 0 && OutputIdx < (int)Constraints.size())
+ TiedOutput[I] = OutputIdx;
+ }
+ }
}
-SmallVector<CallBrInst *, 2> FindCallBrs(Function &F) {
- SmallVector<CallBrInst *, 2> CBRs;
- for (BasicBlock &BB : F)
- if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
- if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
- CBRs.push_back(CBR);
- return CBRs;
+/// Process an output constraint, creating allocas for converted constraints.
+static void processOutputConstraint(
+ const InlineAsm::ConstraintInfo &C, Type *RetTy, unsigned OutputIdx,
+ IRBuilder<> &EntryBuilder, SmallVectorImpl<Value *> &NewArgs,
+ SmallVectorImpl<Type *> &NewRetTypes,
+ SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs,
+ SmallVectorImpl<std::pair<AllocaInst *, Type *>> &OutputAllocas,
+ unsigned ConstraintIdx) {
+ Type *SlotTy = RetTy;
+ if (StructType *ST = dyn_cast<StructType>(RetTy))
+ SlotTy = ST->getElementType(OutputIdx);
+
+ if (C.hasRegMemConstraints()) {
+ // Converted to memory constraint. Create alloca and pass pointer as
+ // argument.
+ AllocaInst *Slot = EntryBuilder.CreateAlloca(SlotTy, nullptr, "asm_mem");
+ NewArgs.push_back(Slot);
+ ElementTypeAttrs.push_back({NewArgs.size() - 1, SlotTy});
+ OutputAllocas[ConstraintIdx] = std::make_pair(Slot, SlotTy);
+ // No return value for this output since it's now an out-parameter.
+ } else {
+ // Unchanged, still an output return value.
+ NewRetTypes.push_back(SlotTy);
+ }
}
-bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
- bool Changed = false;
- CriticalEdgeSplittingOptions Options(&DT);
- Options.setMergeIdenticalEdges();
+/// Process an input constraint, handling tied constraints and conversions.
+static void processInputConstraint(
+ const InlineAsm::ConstraintInfo &C, Value *ArgVal, Type *InputElementType,
+ ArrayRef<int> TiedOutput,
+ ArrayRef<std::pair<AllocaInst *, Type *>> OutputAllocas,
+ unsigned ConstraintIdx, IRBuilder<> &Builder, IRBuilder<> &EntryBuilder,
+ SmallVectorImpl<Value *> &NewArgs,
+ SmallVectorImpl<std::pair<unsigned, Type *>> &ElementTypeAttrs) {
+ Type *ArgTy = ArgVal->getType();
+
+ if (TiedOutput[ConstraintIdx] != -1) {
+ int MatchIdx = TiedOutput[ConstraintIdx];
+ if (auto [Slot, _] = OutputAllocas[MatchIdx]; Slot) {
+ // The matched output was converted to memory. Store this input into the
+ // alloca.
+ Builder.CreateStore(ArgVal, Slot);
+
+ // Pass the alloca pointer as the argument, instead of ArgVal. This
+ // ensures the tied "0" constraint matches the "*m" output.
+ NewArgs.push_back(Slot);
+ return;
+ }
+ }
- // The indirect destination might be duplicated between another parameter...
- // %0 = callbr ... [label %x, label %x]
- // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
- // to split the default destination if it's duplicated between an indirect
- // destination...
- // %1 = callbr ... to label %x [label %x]
- // ...hence starting at 1 and checking against successor 0 (aka the default
- // destination).
- for (CallBrInst *CBR : CBRs)
- for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
- if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
- isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
- if (SplitKnownCriticalEdge(CBR, i, Options))
- Changed = true;
- return Changed;
+ if (C.hasRegMemConstraints() && !C.isIndirect) {
+ // Converted to memory constraint. Create alloca, store input, pass pointer
+ // as argument.
+ AllocaInst *Slot = EntryBuilder.CreateAlloca(ArgTy, nullptr, "asm_mem");
+ Builder.CreateStore(ArgVal, Slot);
+ NewArgs.push_back(Slot);
+ ElementTypeAttrs.push_back({NewArgs.size() - 1, ArgTy});
+ } else {
+ // Unchanged
+ NewArgs.push_back(ArgVal);
+ if (InputElementType)
+ ElementTypeAttrs.push_back({NewArgs.size() - 1, InputElementType});
+ }
}
-bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT) {
- bool Changed = false;
- SmallPtrSet<const BasicBlock *, 4> Visited;
- IRBuilder<> Builder(CBRs[0]->getContext());
- for (CallBrInst *CBR : CBRs) {
- if (!CBR->getNumIndirectDests())
- continue;
+/// Build the return type from the collected return types.
+static Type *buildReturnType(ArrayRef<Type *> NewRetTypes,
+ LLVMContext &Context) {
+ if (NewRetTypes.empty())
+ return Type::getVoidTy(Context);
+
+ if (NewRetTypes.size() == 1)
+ return NewRetTypes[0];
- SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(CBR->getType(), CBR->getName());
- SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
- SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+ return StructType::get(Context, NewRetTypes);
+}
+
+/// Create the new inline assembly call with converted constraints.
+static CallInst *
+createNewInlineAsm(InlineAsm *IA, const std::string &NewConstraintStr,
+ Type *NewRetTy, ArrayRef<Value *> NewArgs,
+ ArrayRef<std::pair<unsigned, Type *>> ElementTypeAttrs,
+ CallBase *CB, bool MayWriteMem, IRBuilder<> &Builder,
+ LLVMContext &Context) {
+ SmallVector<Type *> NewArgTypes;
+ for (const auto *NewArg : NewArgs)
+ NewArgTypes.push_back(NewArg->getType());
+
+ FunctionType *NewFTy = FunctionType::get(NewRetTy, NewArgTypes, false);
+ InlineAsm *NewIA = InlineAsm::get(
+ NewFTy, IA->getAsmString(), NewConstraintStr, IA->hasSideEffects(),
+ IA->isAlignStack(), IA->getDialect(), IA->canThrow());
+
+ CallInst *NewCall = Builder.CreateCall(NewFTy, NewIA, NewArgs);
+ NewCall->setCallingConv(CB->getCallingConv());
+ NewCall->setAttributes(CB->getAttributes());
+ NewCall->setDebugLoc(CB->getDebugLoc());
----------------
nikic wrote:
setDebugLoc should already be covered by copyMetadata.
https://github.com/llvm/llvm-project/pull/181973
More information about the cfe-commits
mailing list