[llvm-branch-commits] [llvm-branch] r117425 [4/9] - in /llvm/branches/wendling/eh: ./ autoconf/ autoconf/m4/ bindings/ada/ bindings/ocaml/llvm/ bindings/ocaml/transforms/scalar/ cmake/ cmake/modules/ docs/ docs/CommandGuide/ docs/tutorial/ examples/ examples/BrainF/ examples/ExceptionDemo/ examples/Fibonacci/ examples/Kaleidoscope/Chapter7/ examples/ModuleMaker/ include/llvm-c/ include/llvm-c/Transforms/ include/llvm/ include/llvm/ADT/ include/llvm/Analysis/ include/llvm/Assembly/ include/llvm/Bitcode/ include/llvm/CodeGen/ i...
Bill Wendling
isanbard at gmail.com
Tue Oct 26 17:48:11 PDT 2010
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMFastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMFastISel.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMFastISel.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMFastISel.cpp Tue Oct 26 19:48:03 2010
@@ -14,26 +14,44 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
#include "ARMRegisterInfo.h"
#include "ARMTargetMachine.h"
#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
#include "llvm/CallingConv.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+static cl::opt<bool>
+DisableARMFastISel("disable-arm-fast-isel",
+ cl::desc("Turn off experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
namespace {
class ARMFastISel : public FastISel {
@@ -41,24 +59,1680 @@
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ ARMFunctionInfo *AFI;
+
+ // Convenience variables to avoid some queries.
+ bool isThumb;
+ LLVMContext *Context;
public:
- explicit ARMFastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ Context = &funcInfo.Fn->getContext();
}
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
#include "ARMGenFastISel.inc"
- };
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectSIToFP(const Instruction *I);
+ bool SelectFPToSI(const Instruction *I);
+ bool SelectSDiv(const Instruction *I);
+ bool SelectSRem(const Instruction *I);
+ bool SelectCall(const Instruction *I);
+ bool SelectSelect(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, EVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, EVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Base, int Offset);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Base, int Offset);
+ bool ARMComputeRegOffset(const Value *Obj, unsigned &Base, int &Offset);
+ void ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT);
+ unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+ unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+ unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+ unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+ unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+
+ // Call handling routines.
+ private:
+ bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+ unsigned &ResultReg);
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+ bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes);
+ bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+ // OptionalDef handling routines.
+ private:
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+};
} // end anonymous namespace
-// #include "ARMGenCallingConv.inc"
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate?
+ if (TII.isPredicable(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+ if (VT.getSimpleVT().SimpleTy == MVT::f64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRS), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+ if (VT.getSimpleVT().SimpleTy == MVT::i64) return 0;
+
+ unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVSR), MoveReg)
+ .addReg(SrcReg));
+ return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+ const APFloat Val = CFP->getValueAPF();
+ bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64;
+
+ // This checks to see if we can use VFP3 instructions to materialize
+ // a constant, otherwise we have to go through the constant pool.
+ if (TLI.isFPImmLegal(Val, VT)) {
+ unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addFPImm(CFP));
+ return DestReg;
+ }
+
+ // Require VFP2 for loading fp constants.
+ if (!Subtarget->hasVFP2()) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(CFP->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+ // The extra reg is for addrmode5.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0));
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+ // For now 32-bit only.
+ if (VT.getSimpleVT().SimpleTy != MVT::i32) return false;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci), DestReg)
+ .addConstantPoolIndex(Idx));
+ else
+ // The extra reg and immediate are for addrmode2.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp), DestReg)
+ .addConstantPoolIndex(Idx)
+ .addImm(0));
+
+ return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+ // For now 32-bit only.
+ if (VT.getSimpleVT().SimpleTy != MVT::i32) return 0;
+
+ Reloc::Model RelocM = TM.getRelocationModel();
+
+ // TODO: No external globals for now.
+ if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
+
+ // TODO: Need more magic for ARM PIC.
+ if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(GV->getType());
+ }
+
+ // Grab index.
+ unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+ unsigned Id = AFI->createConstPoolEntryUId();
+ ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+ ARMCP::CPValue, PCAdj);
+ unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+ // Load value.
+ MachineInstrBuilder MIB;
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ if (isThumb) {
+ unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx);
+ if (RelocM == Reloc::PIC_)
+ MIB.addImm(Id);
+ } else {
+ // The extra reg and immediate are for addrmode2.
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+ DestReg)
+ .addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0);
+ }
+ AddOptionalDefs(MIB);
+ return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return ARMMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return ARMMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return ARMMaterializeInt(C, VT);
+
+ return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ EVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ // This will get lowered later into the correct offsets and registers
+ // via rewriteXFrameIndex.
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+ unsigned ResultReg = createResultReg(RC);
+ unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addFrameIndex(SI->second)
+ .addImm(0));
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
+ VT = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (VT == MVT::Other || !VT.isSimple()) return false;
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the Reg+Offset to get to an object.
+bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Base,
+ int &Offset) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast: {
+ // Look through bitcasts.
+ return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ }
+ case Instruction::IntToPtr: {
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ break;
+ }
+ case Instruction::PtrToInt: {
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return ARMComputeRegOffset(U->getOperand(0), Base, Offset);
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ int SavedOffset = Offset;
+ unsigned SavedBase = Base;
+ int TmpOffset = Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+ i != e; ++i, ++GTI) {
+ const Value *Op = *i;
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else
+ goto unsupported_gep;
+ } while (!Worklist.empty());
+ }
+ }
+
+ // Try to grab the base operand now.
+ Offset = TmpOffset;
+ if (ARMComputeRegOffset(U->getOperand(0), Base, Offset)) return true;
+
+ // We failed, restore everything and try the other options.
+ Offset = SavedOffset;
+ Base = SavedBase;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ unsigned Reg = TargetMaterializeAlloca(AI);
+
+ if (Reg == 0) return false;
+
+ Base = Reg;
+ return true;
+ }
+ }
+
+ // Materialize the global variable's address into a reg which can
+ // then be used later to load the variable.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+ if (Tmp == 0) return false;
+
+ Base = Tmp;
+ return true;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ if (Base == 0) Base = getRegForValue(Obj);
+ return Base != 0;
+}
+
+void ARMFastISel::ARMSimplifyRegOffset(unsigned &Base, int &Offset, EVT VT) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+ bool needsLowering = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Unhandled load/store type!");
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ // Integer loads/stores handle 12-bit offsets.
+ needsLowering = ((Offset & 0xfff) != Offset);
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ // Floating point operands handle 8-bit offsets.
+ needsLowering = ((Offset & 0xff) != Offset);
+ break;
+ }
+
+ // Since the offset is too large for the load/store instruction
+ // get the reg+offset into a register.
+ if (needsLowering) {
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned PredReg = 0;
+
+ TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+ ARM::GPRRegisterClass;
+ unsigned BaseReg = createResultReg(RC);
+
+ if (!isThumb)
+ emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BaseReg, Base, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ BaseReg, Base, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ }
+ Offset = 0;
+ Base = BaseReg;
+ }
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
+ unsigned Base, int Offset) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+ TargetRegisterClass *RC;
+ bool isFloat = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ // This is mostly going to be Neon/vector support.
+ return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRB;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+ RC = ARM::GPRRegisterClass;
+ break;
+ case MVT::f32:
+ Opc = ARM::VLDRS;
+ RC = TLI.getRegClassFor(VT);
+ isFloat = true;
+ break;
+ case MVT::f64:
+ Opc = ARM::VLDRD;
+ RC = TLI.getRegClassFor(VT);
+ isFloat = true;
+ break;
+ }
+
+ ResultReg = createResultReg(RC);
+
+ ARMSimplifyRegOffset(Base, Offset, VT);
+
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (isFloat)
+ Offset /= 4;
+
+ // The thumb and floating point instructions both take 2 operands, ARM takes
+ // another register.
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Base).addImm(Offset));
+ return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+ // Verify we have a legal type before going any further.
+ EVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Base = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(0), Base, Offset))
+ return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Base, Offset)) return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
+ unsigned Base, int Offset) {
+ unsigned StrOpc;
+ bool isFloat = false;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8:
+ StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRB;
+ break;
+ case MVT::i16:
+ StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+ break;
+ case MVT::i32:
+ StrOpc = isThumb ? ARM::t2STRi12 : ARM::STR;
+ break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ isFloat = true;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ isFloat = true;
+ break;
+ }
+
+ ARMSimplifyRegOffset(Base, Offset, VT);
+
+ // addrmode5 output depends on the selection dag addressing dividing the
+ // offset by 4 that it then later multiplies. Do this here as well.
+ if (isFloat)
+ Offset /= 4;
+
+ // The thumb addressing mode has operands swapped from the arm addressing
+ // mode, the floating point one only has two operands.
+ if (isFloat || isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg).addReg(Base).addImm(Offset));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc))
+ .addReg(SrcReg).addReg(Base).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Yay type legalization
+ EVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Base = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(1), Base, Offset))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Base, Offset)) return false;
+
+ return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // Needs two compares...
+ case CmpInst::FCMP_ONE:
+ case CmpInst::FCMP_UEQ:
+ default:
+ assert(false && "Unhandled CmpInst::Predicate!");
+ return ARMCC::AL;
+ case CmpInst::ICMP_EQ:
+ case CmpInst::FCMP_OEQ:
+ return ARMCC::EQ;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::FCMP_OGT:
+ return ARMCC::GT;
+ case CmpInst::ICMP_SGE:
+ case CmpInst::FCMP_OGE:
+ return ARMCC::GE;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::FCMP_UGT:
+ return ARMCC::HI;
+ case CmpInst::FCMP_OLT:
+ return ARMCC::MI;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::FCMP_OLE:
+ return ARMCC::LS;
+ case CmpInst::FCMP_ORD:
+ return ARMCC::VC;
+ case CmpInst::FCMP_UNO:
+ return ARMCC::VS;
+ case CmpInst::FCMP_UGE:
+ return ARMCC::PL;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::FCMP_ULT:
+ return ARMCC::LT;
+ case CmpInst::ICMP_SLE:
+ case CmpInst::FCMP_ULE:
+ return ARMCC::LE;
+ case CmpInst::FCMP_UNE:
+ case CmpInst::ICMP_NE:
+ return ARMCC::NE;
+ case CmpInst::ICMP_UGE:
+ return ARMCC::HS;
+ case CmpInst::ICMP_ULT:
+ return ARMCC::LO;
+ }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+ // TODO: Try to avoid the re-computation in some places.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0) return false;
+
+ // Re-set the flags just in case.
+ unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(1));
+
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+ const CmpInst *CI = cast<CmpInst>(I);
+
+ EVT VT;
+ const Type *Ty = CI->getOperand(0)->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned CmpOpc;
+ unsigned CondReg;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ // TODO: Verify compares.
+ case MVT::f32:
+ CmpOpc = ARM::VCMPES;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::f64:
+ CmpOpc = ARM::VCMPED;
+ CondReg = ARM::FPSCR;
+ break;
+ case MVT::i32:
+ CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ CondReg = ARM::CPSR;
+ break;
+ }
+
+ // Get the compare predicate.
+ ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+ // We may not handle every CC for now.
+ if (ARMPred == ARMCC::AL) return false;
+
+ unsigned Arg1 = getRegForValue(CI->getOperand(0));
+ if (Arg1 == 0) return false;
+
+ unsigned Arg2 = getRegForValue(CI->getOperand(1));
+ if (Arg2 == 0) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(Arg1).addReg(Arg2));
+
+ // For floating point we need to move the result to a comparison register
+ // that we can then use for branches.
+ if (isFloat)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::FMSTAT)));
+
+ // Now set a register based on the comparison. Explicitly set the predicates
+ // here.
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+ TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+ : ARM::GPRRegisterClass;
+ unsigned DestReg = createResultReg(RC);
+ Constant *Zero
+ = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+ unsigned ZeroReg = TargetMaterializeConstant(Zero);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+ .addReg(ZeroReg).addImm(1)
+ .addImm(ARMPred).addReg(CondReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+ // Make sure we have VFP and that we're extending float to double.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!I->getType()->isDoubleTy() ||
+ !V->getType()->isFloatTy()) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::DPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTDS), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+ // Make sure we have VFP and that we're truncating double to float.
+ if (!Subtarget->hasVFP2()) return false;
+
+ Value *V = I->getOperand(0);
+ if (!(I->getType()->isFloatTy() &&
+ V->getType()->isDoubleTy())) return false;
+
+ unsigned Op = getRegForValue(V);
+ if (Op == 0) return false;
+
+ unsigned Result = createResultReg(ARM::SPRRegisterClass);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VCVTSD), Result)
+ .addReg(Op));
+ UpdateValueMap(I, Result);
+ return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ EVT DstVT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ // The conversion routine works on fp-reg to fp-reg and the operand above
+ // was an integer, move it to the fp registers if possible.
+ unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+ if (FP == 0) return false;
+ unsigned Opc;
+ if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+ else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+ else return 0;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(FP));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+ // Make sure we have VFP.
+ if (!Subtarget->hasVFP2()) return false;
+
+ EVT DstVT;
+ const Type *RetTy = I->getType();
+ if (!isTypeLegal(RetTy, DstVT))
+ return false;
+
+ unsigned Op = getRegForValue(I->getOperand(0));
+ if (Op == 0) return false;
+
+ unsigned Opc;
+ const Type *OpTy = I->getOperand(0)->getType();
+ if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+ else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+ else return 0;
+
+ // f64->s32 or f32->s32 both need an intermediate f32 reg.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+ ResultReg)
+ .addReg(Op));
+
+ // This result needs to be in an integer register, but the conversion only
+ // takes place in fp-regs.
+ unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+ if (IntReg == 0) return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+ EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
+ if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
+ return false;
+
+ // Things need to be register sized for register moves.
+ if (VT.getSimpleVT().SimpleTy != MVT::i32) return false;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+ unsigned CondReg = getRegForValue(I->getOperand(0));
+ if (CondReg == 0) return false;
+ unsigned Op1Reg = getRegForValue(I->getOperand(1));
+ if (Op1Reg == 0) return false;
+ unsigned Op2Reg = getRegForValue(I->getOperand(2));
+ if (Op2Reg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addImm(1));
+ unsigned ResultReg = createResultReg(RC);
+ unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+ EVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ // If we have integer div support we should have selected this automagically.
+ // In case we have a real miss go ahead and return false and we'll pick
+ // it up later.
+ if (Subtarget->hasDivide()) return false;
+
+ // Otherwise emit a libcall.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SDIV_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SDIV_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SDIV_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SDIV_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SDIV_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+ EVT VT;
+ const Type *Ty = I->getType();
+ if (!isTypeLegal(Ty, VT))
+ return false;
+
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (VT == MVT::i8)
+ LC = RTLIB::SREM_I8;
+ else if (VT == MVT::i16)
+ LC = RTLIB::SREM_I16;
+ else if (VT == MVT::i32)
+ LC = RTLIB::SREM_I32;
+ else if (VT == MVT::i64)
+ LC = RTLIB::SREM_I64;
+ else if (VT == MVT::i128)
+ LC = RTLIB::SREM_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+ return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT VT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we want to use NEON for our fp
+ // operations, but can't figure out how to. Just use the vfp instructions
+ // if we have them.
+ // FIXME: It'd be nice to use NEON instructions.
+ const Type *Ty = I->getType();
+ bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+ if (isFloat && !Subtarget->hasVFP2())
+ return false;
+
+ unsigned Op1 = getRegForValue(I->getOperand(0));
+ if (Op1 == 0) return false;
+
+ unsigned Op2 = getRegForValue(I->getOperand(1));
+ if (Op2 == 0) return false;
+
+ unsigned Opc;
+ bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64 ||
+ VT.getSimpleVT().SimpleTy == MVT::i64;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::FADD:
+ Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+ break;
+ case ISD::FSUB:
+ Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+ break;
+ case ISD::FMUL:
+ Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+ break;
+ }
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Op1).addReg(Op2));
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+ EVT SrcVT, unsigned &ResultReg) {
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
+
+ if (RR != 0) {
+ ResultReg = RR;
+ return true;
+ } else
+ return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::Fast:
+ // Ignore fastcc. Silence compiler warnings.
+ (void)RetFastCC_ARM_APCS;
+ (void)FastCC_ARM_APCS;
+ // Fallthrough
+ case CallingConv::C:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<EVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, false, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackDown))
+ .addImm(NumBytes));
+
+ // Process the args.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ EVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // We don't handle NEON parameters yet.
+ if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() > 64)
+ return false;
+
+ // Handle arg promotion, etc.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt: {
+ bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::ZExt: {
+ bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
+ Emitted = true;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::AExt: {
+ bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+ if (!Emitted)
+ Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+ Arg, ArgVT, Arg);
+
+ assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ case CCValAssign::BCvt: {
+ unsigned BC = FastEmit_r(ArgVT.getSimpleVT(),
+ VA.getLocVT().getSimpleVT(),
+ ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
+ assert(BC != 0 && "Failed to emit a bitcast!");
+ Arg = BC;
+ ArgVT = VA.getLocVT();
+ break;
+ }
+ default: llvm_unreachable("Unknown arg promotion!");
+ }
+
+ // Now copy/store arg to correct locations.
+ if (VA.isRegLoc() && !VA.needsCustom()) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg())
+ .addReg(Arg);
+ RegArgs.push_back(VA.getLocReg());
+ } else if (VA.needsCustom()) {
+ // TODO: We need custom lowering for vector (v2f64) args.
+ if (VA.getLocVT() != MVT::f64) return false;
+
+ CCValAssign &NextVA = ArgLocs[++i];
+
+ // TODO: Only handle register args for now.
+ if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVRRD), VA.getLocReg())
+ .addReg(NextVA.getLocReg(), RegState::Define)
+ .addReg(Arg));
+ RegArgs.push_back(VA.getLocReg());
+ RegArgs.push_back(NextVA.getLocReg());
+ } else {
+ assert(VA.isMemLoc());
+ // Need to store on the stack.
+ unsigned Base = ARM::SP;
+ int Offset = VA.getLocMemOffset();
+
+ if (!ARMEmitStore(ArgVT, Arg, Base, Offset)) return false;
+ }
+ }
+ return true;
+}
+
+bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes) {
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0));
+
+ // Now the return value.
+ if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+ // Copy all of the result registers out of their specified physreg.
+ if (RVLocs.size() == 2 && RetVT.getSimpleVT().SimpleTy == MVT::f64) {
+ // For this move we copy into two registers and then move into the
+ // double fp reg we want.
+ EVT DestVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+ unsigned ResultReg = createResultReg(DstRC);
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::VMOVDRR), ResultReg)
+ .addReg(RVLocs[0].getLocReg())
+ .addReg(RVLocs[1].getLocReg()));
+
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[1].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ } else {
+ assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+ EVT CopyVT = RVLocs[0].getValVT();
+ TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+ unsigned ResultReg = createResultReg(DstRC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
+ // Finally update the result.
+ UpdateValueMap(I, ResultReg);
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ if (F.isVarArg())
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (VA.getValVT() != TLI.getValueType(RV->getType()))
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(RetOpc)));
+ return true;
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+ CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ EVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // For now we're using BLX etc on the assumption that we have v5t ops.
+ if (!Subtarget->hasV5TOps()) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<EVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(I->getNumOperands());
+ ArgRegs.reserve(I->getNumOperands());
+ ArgVTs.reserve(I->getNumOperands());
+ ArgFlags.reserve(I->getNumOperands());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value *Op = I->getOperand(i);
+ unsigned Arg = getRegForValue(Op);
+ if (Arg == 0) return false;
+
+ const Type *ArgTy = Op->getType();
+ EVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+ ISD::ArgFlagsTy Flags;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(Op);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc;
+ if(isThumb)
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ else
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addExternalSymbol(TLI.getLibcallName(Call));
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm or worry about intrinsics yet.
+ if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+ // Only handle global variable Callees that are direct calls.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()))
+ return false;
+
+ // Check the calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ // TODO: Avoid some calling conventions?
+
+ // Let SDISel handle vararg functions.
+ const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ if (FTy->isVarArg())
+ return false;
+
+ // Handle *simple* calls for now.
+ const Type *RetTy = I->getType();
+ EVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ // For now we're using BLX etc on the assumption that we have v5t ops.
+ // TODO: Maybe?
+ if (!Subtarget->hasV5TOps()) return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<EVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+ Args.reserve(CS.arg_size());
+ ArgRegs.reserve(CS.arg_size());
+ ArgVTs.reserve(CS.arg_size());
+ ArgFlags.reserve(CS.arg_size());
+ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+ i != e; ++i) {
+ unsigned Arg = getRegForValue(*i);
+
+ if (Arg == 0)
+ return false;
+ ISD::ArgFlagsTy Flags;
+ unsigned AttrInd = i - CS.arg_begin() + 1;
+ if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+ Flags.setZExt();
+
+ // FIXME: Only handle *easy* calls for now.
+ if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+ CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+ CS.paramHasAttr(AttrInd, Attribute::ByVal))
+ return false;
+
+ const Type *ArgTy = (*i)->getType();
+ EVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT))
+ return false;
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*i);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Handle the arguments now that we've gotten them.
+ SmallVector<unsigned, 4> RegArgs;
+ unsigned NumBytes;
+ if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+ return false;
+
+ // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+ // TODO: Turn this into the table of arm call ops.
+ MachineInstrBuilder MIB;
+ unsigned CallOpc;
+ if(isThumb)
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ else
+ CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, 0);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+ MIB.addReg(RegArgs[i]);
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+
+}
+
+// TODO: SoftFP support.
bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+ // No Thumb-1 for now.
+ if (isThumb && !AFI->isThumb2Function()) return false;
+
switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return SelectCmp(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectSIToFP(I);
+ case Instruction::FPToSI:
+ return SelectFPToSI(I);
+ case Instruction::FAdd:
+ return SelectBinaryOp(I, ISD::FADD);
+ case Instruction::FSub:
+ return SelectBinaryOp(I, ISD::FSUB);
+ case Instruction::FMul:
+ return SelectBinaryOp(I, ISD::FMUL);
+ case Instruction::SDiv:
+ return SelectSDiv(I);
+ case Instruction::SRem:
+ return SelectSRem(I);
+ case Instruction::Call:
+ return SelectCall(I);
+ case Instruction::Select:
+ return SelectSelect(I);
+ case Instruction::Ret:
+ return SelectRet(I);
default: break;
}
return false;
@@ -66,7 +1740,11 @@
namespace llvm {
llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
- // Turn it off for now. It's not quite ready.
+ // Completely untested on non-darwin.
+ const TargetMachine &TM = funcInfo.MF->getTarget();
+ const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget->isTargetDarwin() && !DisableARMFastISel)
+ return new ARMFastISel(funcInfo);
return 0;
}
}
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMGlobalMerge.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMGlobalMerge.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMGlobalMerge.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMGlobalMerge.cpp Tue Oct 26 19:48:03 2010
@@ -12,7 +12,8 @@
// global). Such a transformation can significantly reduce the register pressure
// when many globals are involved.
//
-// For example, consider the code which touches several global variables at once:
+// For example, consider the code which touches several global variables at
+// once:
//
// static int foo[N], bar[N], baz[N];
//
@@ -48,7 +49,7 @@
// str r0, [r5], #4
//
// note that we saved 2 registers here almostly "for free".
-// ===----------------------------------------------------------------------===//
+// ===---------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-global-merge"
#include "ARM.h"
@@ -67,7 +68,7 @@
using namespace llvm;
namespace {
- class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+ class ARMGlobalMerge : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// target type sizes.
const TargetLowering *TLI;
@@ -78,10 +79,10 @@
public:
static char ID; // Pass identification, replacement for typeid.
explicit ARMGlobalMerge(const TargetLowering *tli)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {}
virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function& F);
+ virtual bool runOnFunction(Function &F);
const char *getPassName() const {
return "Merge internal globals";
@@ -95,13 +96,11 @@
struct GlobalCmp {
const TargetData *TD;
- GlobalCmp(const TargetData *td):
- TD(td) { }
+ GlobalCmp(const TargetData *td) : TD(td) { }
- bool operator() (const GlobalVariable* GV1,
- const GlobalVariable* GV2) {
- const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
- const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+ bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
}
@@ -131,26 +130,23 @@
std::vector<const Type*> Tys;
std::vector<Constant*> Inits;
for (j = i; MergedSize < MaxOffset && j != e; ++j) {
- const Type* Ty = Globals[j]->getType()->getElementType();
+ const Type *Ty = Globals[j]->getType()->getElementType();
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
MergedSize += TD->getTypeAllocSize(Ty);
}
- StructType* MergedTy = StructType::get(M.getContext(), Tys);
- Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ StructType *MergedTy = StructType::get(M.getContext(), Tys);
+ Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
GlobalValue::InternalLinkage,
MergedInit, "merged");
for (size_t k = i; k < j; ++k) {
- SmallVector<Constant*, 2> Idx;
- Idx.push_back(ConstantInt::get(Int32Ty, 0));
- Idx.push_back(ConstantInt::get(Int32Ty, k-i));
-
- Constant* GEP =
- ConstantExpr::getInBoundsGetElementPtr(MergedGV,
- &Idx[0], Idx.size());
-
+ Constant *Idx[2] = {
+ ConstantInt::get(Int32Ty, 0),
+ ConstantInt::get(Int32Ty, k-i)
+ };
+ Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
}
@@ -161,7 +157,7 @@
}
-bool ARMGlobalMerge::doInitialization(Module& M) {
+bool ARMGlobalMerge::doInitialization(Module &M) {
SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
const TargetData *TD = TLI->getTargetData();
unsigned MaxOffset = TLI->getMaximalGlobalOffset();
@@ -203,7 +199,7 @@
return Changed;
}
-bool ARMGlobalMerge::runOnFunction(Function& F) {
+bool ARMGlobalMerge::runOnFunction(Function &F) {
return false;
}
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMISelDAGToDAG.cpp Tue Oct 26 19:48:03 2010
@@ -46,6 +46,12 @@
/// instructions for SelectionDAG operations.
///
namespace {
+
+enum AddrMode2Type {
+ AM2_BASE, // Simple AM2 (+-imm12)
+ AM2_SHOP // Shifter-op AM2
+};
+
class ARMDAGToDAGISel : public SelectionDAGISel {
ARMBaseTargetMachine &TM;
@@ -72,52 +78,77 @@
SDNode *Select(SDNode *N);
- bool SelectShifterOperandReg(SDNode *Op, SDValue N, SDValue &A,
+ bool SelectShifterOperandReg(SDValue N, SDValue &A,
SDValue &B, SDValue &C);
- bool SelectAddrMode2(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Offset, SDValue &Opc);
+ bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+ AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Opc);
+ bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+ }
+
+ bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+ }
+
+ bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &Opc) {
+ SelectAddrMode2Worker(N, Base, Offset, Opc);
+// return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+ // This always matches one way or another.
+ return true;
+ }
+
bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddrMode3(SDValue N, SDValue &Base,
SDValue &Offset, SDValue &Opc);
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
SDValue &Offset, SDValue &Opc);
- bool SelectAddrMode4(SDNode *Op, SDValue N, SDValue &Addr,
- SDValue &Mode);
- bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectAddrMode4(SDValue N, SDValue &Addr, SDValue &Mode);
+ bool SelectAddrMode5(SDValue N, SDValue &Base,
SDValue &Offset);
- bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align);
+ bool SelectAddrMode6(SDValue N, SDValue &Addr, SDValue &Align);
- bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset,
+ bool SelectAddrModePC(SDValue N, SDValue &Offset,
SDValue &Label);
- bool SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Offset);
- bool SelectThumbAddrModeRI5(SDNode *Op, SDValue N, unsigned Scale,
+ bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+ bool SelectThumbAddrModeRI5(SDValue N, unsigned Scale,
SDValue &Base, SDValue &OffImm,
SDValue &Offset);
- bool SelectThumbAddrModeS1(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectThumbAddrModeS1(SDValue N, SDValue &Base,
SDValue &OffImm, SDValue &Offset);
- bool SelectThumbAddrModeS2(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectThumbAddrModeS2(SDValue N, SDValue &Base,
SDValue &OffImm, SDValue &Offset);
- bool SelectThumbAddrModeS4(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectThumbAddrModeS4(SDValue N, SDValue &Base,
SDValue &OffImm, SDValue &Offset);
- bool SelectThumbAddrModeSP(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
+ bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
- bool SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
+ bool SelectT2ShifterOperandReg(SDValue N,
SDValue &BaseReg, SDValue &Opc);
- bool SelectT2AddrModeImm12(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
- bool SelectT2AddrModeImm8(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+ bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
- bool SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &OffImm);
- bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
+ bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ }
+
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
@@ -143,10 +174,9 @@
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
- /// load/store of D registers and even subregs and odd subregs of Q registers.
+ /// load/store of D registers and Q registers.
SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned NumVecs,
- unsigned *DOpcodes, unsigned *QOpcodes0,
- unsigned *QOpcodes1);
+ unsigned *DOpcodes, unsigned *QOpcodes);
/// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
/// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
@@ -164,10 +194,10 @@
SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
- SDNode *SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
- SDNode *SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+ SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
@@ -188,10 +218,6 @@
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-
- // Form sequences of 8 consecutive D registers.
- SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
- SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
}
@@ -220,8 +246,7 @@
}
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
- SDValue N,
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
@@ -247,8 +272,54 @@
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode2(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &Offset,
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+ SDValue &Base,
+ SDValue &OffImm) {
+ // Match simple R + imm12 operands.
+
+ // Base only.
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+ if (N.getOpcode() == ISD::FrameIndex) {
+ // Match frame index...
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ } else if (N.getOpcode() == ARMISD::Wrapper &&
+ !(Subtarget->useMovt() &&
+ N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+ Base = N.getOperand(0);
+ } else
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if (N.getOpcode() == ISD::SUB)
+ RHSC = -RHSC;
+
+ if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+ return true;
+ }
+ }
+
+ // Base only.
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::MUL) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -273,6 +344,93 @@
}
}
+ if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB)
+ return false;
+
+ // Leave simple R +/- imm12 operands for LDRi12
+ if (N.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int RHSC = (int)RHS->getZExtValue();
+ if ((RHSC >= 0 && RHSC < 0x1000) ||
+ (RHSC < 0 && RHSC > -0x1000)) // 12 bits.
+ return false;
+ }
+ }
+
+ // Otherwise this is R +/- [possibly shifted] R.
+ ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+ unsigned ShAmt = 0;
+
+ Base = N.getOperand(0);
+ Offset = N.getOperand(1);
+
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't fold
+ // it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(1).getOperand(0);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+
+ // Try matching (R shl C) + (R).
+ if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+ ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+ if (ShOpcVal != ARM_AM::no_shift) {
+ // Check to see if the RHS of the shift is a constant, if not, we can't
+ // fold it.
+ if (ConstantSDNode *Sh =
+ dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+ ShAmt = Sh->getZExtValue();
+ Offset = N.getOperand(0).getOperand(0);
+ Base = N.getOperand(1);
+ } else {
+ ShOpcVal = ARM_AM::no_shift;
+ }
+ }
+ }
+
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+ MVT::i32);
+ return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+ SDValue &Base,
+ SDValue &Offset,
+ SDValue &Opc) {
+ if (N.getOpcode() == ISD::MUL) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ // X * [3,5,9] -> X + X * [2,4,8] etc.
+ int RHSC = (int)RHS->getZExtValue();
+ if (RHSC & 1) {
+ RHSC = RHSC & ~1;
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (RHSC < 0) {
+ AddSub = ARM_AM::sub;
+ RHSC = - RHSC;
+ }
+ if (isPowerOf2_32(RHSC)) {
+ unsigned ShAmt = Log2_32(RHSC);
+ Base = Offset = N.getOperand(0);
+ Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+ ARM_AM::lsl),
+ MVT::i32);
+ return AM2_SHOP;
+ }
+ }
+ }
+ }
+
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
Base = N;
if (N.getOpcode() == ISD::FrameIndex) {
@@ -287,11 +445,11 @@
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
ARM_AM::no_shift),
MVT::i32);
- return true;
+ return AM2_BASE;
}
// Match simple R +/- imm12 operands.
- if (N.getOpcode() == ISD::ADD)
+ if (N.getOpcode() == ISD::ADD) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
if ((RHSC >= 0 && RHSC < 0x1000) ||
@@ -311,9 +469,10 @@
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
ARM_AM::no_shift),
MVT::i32);
- return true;
+ return AM2_BASE;
}
}
+ }
// Otherwise this is R +/- [possibly shifted] R.
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
@@ -354,7 +513,7 @@
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
MVT::i32);
- return true;
+ return AM2_SHOP;
}
bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
@@ -396,7 +555,7 @@
}
-bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
SDValue &Base, SDValue &Offset,
SDValue &Opc) {
if (N.getOpcode() == ISD::SUB) {
@@ -468,14 +627,13 @@
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Mode) {
+bool ARMDAGToDAGISel::SelectAddrMode4(SDValue N, SDValue &Addr, SDValue &Mode) {
Addr = N;
- Mode = CurDAG->getTargetConstant(0, MVT::i32);
+ Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
SDValue &Base, SDValue &Offset) {
if (N.getOpcode() != ISD::ADD) {
Base = N;
@@ -523,15 +681,14 @@
return true;
}
-bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N,
- SDValue &Addr, SDValue &Align) {
+bool ARMDAGToDAGISel::SelectAddrMode6(SDValue N, SDValue &Addr, SDValue &Align){
Addr = N;
// Default to no alignment.
Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
-bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
SDValue &Offset, SDValue &Label) {
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
Offset = N.getOperand(0);
@@ -543,7 +700,7 @@
return false;
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
SDValue &Base, SDValue &Offset){
// FIXME dl should come from the parent load or store, not the address
if (N.getOpcode() != ISD::ADD) {
@@ -561,12 +718,12 @@
}
bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDNode *Op, SDValue N,
+ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue N,
unsigned Scale, SDValue &Base,
SDValue &OffImm, SDValue &Offset) {
if (Scale == 4) {
SDValue TmpBase, TmpOffImm;
- if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+ if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
return false; // We want to select tLDRspi / tSTRspi instead.
if (N.getOpcode() == ARMISD::Wrapper &&
N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
@@ -617,26 +774,26 @@
return true;
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue N,
SDValue &Base, SDValue &OffImm,
SDValue &Offset) {
- return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+ return SelectThumbAddrModeRI5(N, 1, Base, OffImm, Offset);
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue N,
SDValue &Base, SDValue &OffImm,
SDValue &Offset) {
- return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+ return SelectThumbAddrModeRI5(N, 2, Base, OffImm, Offset);
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue N,
SDValue &Base, SDValue &OffImm,
SDValue &Offset) {
- return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+ return SelectThumbAddrModeRI5(N, 4, Base, OffImm, Offset);
}
-bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm) {
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+ SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
@@ -671,8 +828,7 @@
return false;
}
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
- SDValue &BaseReg,
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
SDValue &Opc) {
if (DisableShifterOp)
return false;
@@ -694,7 +850,7 @@
return false;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R + imm12 operands.
@@ -719,7 +875,7 @@
}
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- if (SelectT2AddrModeImm8(Op, N, Base, OffImm))
+ if (SelectT2AddrModeImm8(N, Base, OffImm))
// Let t2LDRi8 handle (R - imm8).
return false;
@@ -744,7 +900,7 @@
return true;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
SDValue &Base, SDValue &OffImm) {
// Match simple R - imm8 operands.
if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::SUB) {
@@ -787,35 +943,7 @@
return false;
}
-bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
- SDValue &Base, SDValue &OffImm) {
- if (N.getOpcode() == ISD::ADD) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- // 8 bits.
- if (((RHSC & 0x3) == 0) &&
- ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) {
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
- return true;
- }
- }
- } else if (N.getOpcode() == ISD::SUB) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int RHSC = (int)RHS->getZExtValue();
- // 8 bits.
- if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) {
- Base = N.getOperand(0);
- OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
- return true;
- }
- }
- }
-
- return false;
-}
-
-bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDNode *Op, SDValue N,
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
@@ -1035,37 +1163,22 @@
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
}
-/// OctoDRegs - Form 8 consecutive D registers.
-///
-SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
- SDValue V2, SDValue V3,
- SDValue V4, SDValue V5,
- SDValue V6, SDValue V7) {
- DebugLoc dl = V0.getNode()->getDebugLoc();
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
- SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
- SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
- SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
- SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
- SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
- const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
- V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
-}
-
-/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
-/// for a 64-bit subregister of the vector.
-static EVT GetNEONSubregVT(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("unhandled NEON type");
- case MVT::v16i8: return MVT::v8i8;
- case MVT::v8i16: return MVT::v4i16;
- case MVT::v4f32: return MVT::v2f32;
- case MVT::v4i32: return MVT::v2i32;
- case MVT::v2i64: return MVT::v1i64;
- }
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction. The supported values depend on the
+/// number of registers being loaded.
+static unsigned GetVLDSTAlign(SDNode *N, unsigned NumVecs, bool is64BitVector) {
+ unsigned NumRegs = NumVecs;
+ if (!is64BitVector && NumVecs < 3)
+ NumRegs *= 2;
+
+ unsigned Alignment = cast<MemIntrinsicSDNode>(N)->getAlignment();
+ if (Alignment >= 32 && NumRegs == 4)
+ return 32;
+ if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+ return 16;
+ if (Alignment >= 8)
+ return 8;
+ return 0;
}
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
@@ -1075,13 +1188,16 @@
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ if (!SelectAddrMode6(N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
bool is64BitVector = VT.is64BitVector();
+ unsigned Alignment = GetVLDSTAlign(N, NumVecs, is64BitVector);
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld type");
@@ -1101,110 +1217,79 @@
break;
}
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(NumVecs, VT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (NumVecs < 2)
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
return VLd;
- SDValue RegSeq;
- SDValue V0 = SDValue(VLd, 0);
- SDValue V1 = SDValue(VLd, 1);
-
- // Form a REG_SEQUENCE to force register allocation.
- if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- else {
- SDValue V2 = SDValue(VLd, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLd, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
+ SuperReg = SDValue(VLd, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, RegSeq);
+ dl, VT, SuperReg);
ReplaceUses(SDValue(N, Vec), D);
}
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
return NULL;
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(2 * NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 2 * NumVecs);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
+
+ SuperReg = SDValue(VLd, 0);
+ Chain = SDValue(VLd, 1);
- // Combine the even and odd subregs to produce the result.
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MemAddr.getValueType());
- ResTys.push_back(MVT::Other);
+ EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
- Chain = SDValue(VLdA, NumVecs+1);
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
- Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
- Chain = SDValue(VLdB, NumVecs+1);
-
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ Pred, Reg0, Chain };
+ SDNode *VLdB =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SuperReg = SDValue(VLdB, 0);
+ Chain = SDValue(VLdB, 2);
+ }
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
+ // Extract out the Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), Q);
}
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
@@ -1217,13 +1302,16 @@
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ if (!SelectAddrMode6(N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
EVT VT = N->getOperand(3).getValueType();
bool is64BitVector = VT.is64BitVector();
+ unsigned Alignment = GetVLDSTAlign(N, NumVecs, is64BitVector);
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vst type");
@@ -1246,12 +1334,14 @@
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (is64BitVector) {
- if (NumVecs >= 2) {
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1261,118 +1351,68 @@
RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
else {
SDValue V2 = N->getOperand(2+3);
- // If it's a vld3, form a quad D-register and leave the last part as
+ // If it's a vld3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2,
- // storing pairs of D regs.
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 2) {
- // First extract the pair of Q registers.
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
-
- // Form a QQ register.
- SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
- QQ));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3)));
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1380,13 +1420,12 @@
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned NumVecs, unsigned *DOpcodes,
- unsigned *QOpcodes0,
- unsigned *QOpcodes1) {
+ unsigned *QOpcodes) {
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align))
+ if (!SelectAddrMode6(N->getOperand(2), MemAddr, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1395,14 +1434,15 @@
EVT VT = IsLoad ? N->getValueType(0) : N->getOperand(3).getValueType();
bool is64BitVector = VT.is64BitVector();
- // Quad registers are handled by load/store of subregs. Find the subreg info.
- unsigned NumElts = 0;
- bool Even = false;
- EVT RegVT = VT;
- if (!is64BitVector) {
- RegVT = GetNEONSubregVT(VT);
- NumElts = RegVT.getVectorNumElements();
- Even = Lane < NumElts;
+ if (NumVecs != 3) {
+ unsigned Alignment = cast<MemIntrinsicSDNode>(N)->getAlignment();
+ unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+ if (Alignment > NumBytes)
+ Alignment = NumBytes;
+ // Alignment must be a power of two; make sure of that.
+ Alignment = (Alignment & -Alignment);
+ if (Alignment > 1)
+ Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
}
unsigned OpcodeIndex;
@@ -1422,121 +1462,59 @@
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
- unsigned Opc = 0;
- if (is64BitVector) {
- Opc = DOpcodes[OpcodeIndex];
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
+ unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes[OpcodeIndex]);
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
+ SDValue SuperReg;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ if (is64BitVector)
+ SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else
+ SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Check if this is loading the even or odd subreg of a Q register.
- if (Lane < NumElts) {
- Opc = QOpcodes0[OpcodeIndex];
- } else {
- Lane -= NumElts;
- Opc = QOpcodes1[OpcodeIndex];
- }
-
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- }
-
- // Extract the subregs of the input vector.
- unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
- RegSeq));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ if (is64BitVector)
+ SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ else
+ SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+ Ops.push_back(SuperReg);
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
Ops.push_back(Reg0);
Ops.push_back(Chain);
if (!IsLoad)
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6);
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
-
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- if (is64BitVector) {
- SDValue V0 = SDValue(VLdLn, 0);
- SDValue V1 = SDValue(VLdLn, 1);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = SDValue(VLdLn, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLdLn, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
- } else {
- // For 128-bit vectors, take the 64-bit results of the load and insert
- // them as subregs into the result.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- if (Even) {
- V[i] = SDValue(VLdLn, Vec);
- V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- } else {
- V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- V[i+1] = SDValue(VLdLn, Vec);
- }
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 7);
- if (NumVecs == 2)
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
- else
- RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
- }
+ EVT ResTy;
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other,
+ Ops.data(), 7);
+ SuperReg = SDValue(VLdLn, 0);
+ Chain = SDValue(VLdLn, 1);
+ // Extract the subregisters.
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
}
@@ -1555,7 +1533,7 @@
RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
else {
SDValue V2 = N->getOperand(FirstTblReg + 2);
- // If it's a vtbl3, form a quad D-register and leave the last part as
+ // If it's a vtbl3, form a quad D-register and leave the last part as
// an undef.
SDValue V3 = (NumVecs == 3)
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
@@ -1563,17 +1541,10 @@
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
- // Now extract the D registers back out.
SmallVector<SDValue, 6> Ops;
if (IsExt)
Ops.push_back(N->getOperand(1));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
-
+ Ops.push_back(RegSeq);
Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
Ops.push_back(getAL(CurDAG)); // predicate
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
@@ -1643,7 +1614,7 @@
ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
SDValue CPTmp0;
SDValue CPTmp1;
- if (SelectT2ShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1)) {
+ if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
unsigned Opc = 0;
@@ -1671,7 +1642,7 @@
SDValue CPTmp0;
SDValue CPTmp1;
SDValue CPTmp2;
- if (SelectShifterOperandReg(N, TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+ if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
@@ -1680,35 +1651,39 @@
}
SDNode *ARMDAGToDAGISel::
-SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
if (!T)
return 0;
- if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = Pred_t2_so_imm(TrueVal.getNode());
+ if (isSoImm || TrueImm <= 0xffff) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N,
- ARM::t2MOVCCi, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, (isSoImm ? ARM::t2MOVCCi : ARM::t2MOVCCi16),
+ MVT::i32, Ops, 5);
}
return 0;
}
SDNode *ARMDAGToDAGISel::
-SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
if (!T)
return 0;
- if (Predicate_so_imm(TrueVal.getNode())) {
- SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
+ unsigned TrueImm = T->getZExtValue();
+ bool isSoImm = Pred_so_imm(TrueVal.getNode());
+ if (isSoImm || (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff)) {
+ SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
- return CurDAG->SelectNodeTo(N,
- ARM::MOVCCi, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, (isSoImm ? ARM::MOVCCi : ARM::MOVCCi16),
+ MVT::i32, Ops, 5);
}
return 0;
}
@@ -1751,24 +1726,24 @@
}
// Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
// (imm:i32):$cc)
// Emits: (MOVCCi:i32 GPR:i32:$false,
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
// Pattern complexity = 10 cost = 1 size = 0
if (Subtarget->isThumb()) {
- SDNode *Res = SelectT2CMOVSoImmOp(N, FalseVal, TrueVal,
+ SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
if (!Res)
- Res = SelectT2CMOVSoImmOp(N, TrueVal, FalseVal,
+ Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
if (Res)
return Res;
} else {
- SDNode *Res = SelectARMCMOVSoImmOp(N, FalseVal, TrueVal,
+ SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
CCVal, CCR, InFlag);
if (!Res)
- Res = SelectARMCMOVSoImmOp(N, TrueVal, FalseVal,
+ Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
if (Res)
return Res;
@@ -1862,14 +1837,13 @@
} else {
SDValue Ops[] = {
CPIdx,
- CurDAG->getRegister(0, MVT::i32),
CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32),
CurDAG->getEntryNode()
};
ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
- Ops, 6);
+ Ops, 5);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
return NULL;
@@ -2024,43 +1998,6 @@
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
-
- // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
- MVT::v2f64, MVT::Other, Ops, 5);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
- // Other cases are autogenerated.
- break;
- }
- case ISD::STORE: {
- // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
- AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
// Other cases are autogenerated.
break;
}
@@ -2217,121 +2154,123 @@
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD1d64T };
- unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
- ARM::VLD3q16_UPD,
- ARM::VLD3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
- ARM::VLD3q16odd_UPD,
- ARM::VLD3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD1d64Q };
- unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
- ARM::VLD4q16_UPD,
- ARM::VLD4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
- ARM::VLD4q16odd_UPD,
- ARM::VLD4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld2lane: {
- unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd };
- return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+ ARM::VLD2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld3lane: {
- unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd };
- return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+ ARM::VLD3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vld4lane: {
- unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 };
- unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 };
- unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd };
- return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+ ARM::VLD4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+ return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST1d64T };
- unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
- ARM::VST3q16_UPD,
- ARM::VST3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
- ARM::VST3q16odd_UPD,
- ARM::VST3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST1d64Q };
- unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
- ARM::VST4q16_UPD,
- ARM::VST4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
- ARM::VST4q16odd_UPD,
- ARM::VST4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst2lane: {
- unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 };
- unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 };
- unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd };
- return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+ ARM::VST2LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst3lane: {
- unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 };
- unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 };
- unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd };
- return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+ ARM::VST3LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes);
}
case Intrinsic::arm_neon_vst4lane: {
- unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 };
- unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 };
- unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd };
- return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+ ARM::VST4LNd32Pseudo };
+ unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+ return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes);
}
}
break;
@@ -2344,18 +2283,18 @@
break;
case Intrinsic::arm_neon_vtbl2:
- return SelectVTBL(N, false, 2, ARM::VTBL2);
+ return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
case Intrinsic::arm_neon_vtbl3:
- return SelectVTBL(N, false, 3, ARM::VTBL3);
+ return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
case Intrinsic::arm_neon_vtbl4:
- return SelectVTBL(N, false, 4, ARM::VTBL4);
+ return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
case Intrinsic::arm_neon_vtbx2:
- return SelectVTBL(N, true, 2, ARM::VTBX2);
+ return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
case Intrinsic::arm_neon_vtbx3:
- return SelectVTBL(N, true, 3, ARM::VTBX3);
+ return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
case Intrinsic::arm_neon_vtbx4:
- return SelectVTBL(N, true, 4, ARM::VTBX4);
+ return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
}
break;
}
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.cpp Tue Oct 26 19:48:03 2010
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
@@ -28,6 +29,7 @@
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -55,13 +57,6 @@
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(true));
-
-// This option should go away when Machine LICM is smart enough to hoist a
-// reg-to-reg VDUP.
-static cl::opt<bool>
-EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
- cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
cl::init(false));
static cl::opt<bool>
@@ -74,28 +69,6 @@
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
-static cl::opt<bool>
-EnableARMCodePlacement("arm-code-placement", cl::Hidden,
- cl::desc("Enable code placement pass for ARM"),
- cl::init(false));
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
EVT PromotedBitwiseVT) {
if (VT != PromotedLdStVT) {
@@ -129,7 +102,10 @@
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
}
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -174,6 +150,7 @@
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
+ Itins = TM.getInstrItineraryData();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -257,13 +234,157 @@
setLibcallName(RTLIB::SRL_I128, 0);
setLibcallName(RTLIB::SRA_I128, 0);
- // Libcalls should use the AAPCS base standard ABI, even if hard float
- // is in effect, as per the ARM RTABI specification, section 4.1.2.
if (Subtarget->isAAPCS_ABI()) {
- for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
- setLibcallCallingConv(static_cast<RTLIB::Libcall>(i),
- CallingConv::ARM_AAPCS);
- }
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+ setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+ setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+ setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+ setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+ setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
+ setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
+ setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+ setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+ setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+ setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+ setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+ setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+ setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+ setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+ setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+ setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+ setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+ setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+ setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+ setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
+ setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+ setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
+ setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
+ setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+ setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+ setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+ setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+ setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+ setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+ setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+ setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+ setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+ setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+ setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
}
if (Subtarget->isThumb1Only())
@@ -272,7 +393,8 @@
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -318,9 +440,14 @@
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
@@ -332,6 +459,7 @@
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
}
computeRegisterProperties();
@@ -418,12 +546,10 @@
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
- // use the default expansion.
- bool canHandleAtomics =
- (Subtarget->hasV7Ops() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
- if (canHandleAtomics) {
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -474,24 +600,27 @@
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget->isTargetDarwin()) {
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
}
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
@@ -554,8 +683,7 @@
// are at least 4 bytes aligned.
setMinStackArgumentAlignment(4);
- if (EnableARMCodePlacement)
- benefitFromCodePlacementOpt = true;
+ benefitFromCodePlacementOpt = true;
}
std::pair<const TargetRegisterClass*, uint8_t>
@@ -623,14 +751,15 @@
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
- case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
- case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
-
+
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
@@ -677,6 +806,8 @@
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
@@ -737,8 +868,7 @@
if (TID.mayLoad())
return Sched::Latency;
- const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
- if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+ if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2)
return Sched::Latency;
return Sched::RegPressure;
}
@@ -746,14 +876,15 @@
unsigned
ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case ARM::tGPRRegClassID:
- return 5 - FPDiff;
- case ARM::GPRRegClassID:
- return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
case ARM::DPRRegClassID:
return 32 - 10;
@@ -816,136 +947,6 @@
#include "ARMGenCallingConv.inc"
-// APCS f64 is in register pairs, possibly split to stack
-static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- CCState &State, bool CanFail) {
- static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
- // Try to get the first register.
- if (unsigned Reg = State.AllocateReg(RegList, 4))
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else {
- // For the 2nd half of a v2f64, do not fail.
- if (CanFail)
- return false;
-
- // Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 4),
- LocVT, LocInfo));
- return true;
- }
-
- // Try to get the second register.
- if (unsigned Reg = State.AllocateReg(RegList, 4))
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- else
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(4, 4),
- LocVT, LocInfo));
- return true;
-}
-
-static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
- return false;
- if (LocVT == MVT::v2f64 &&
- !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
- return false;
- return true; // we handled it
-}
-
-// AAPCS f64 is in aligned register pairs
-static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- CCState &State, bool CanFail) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
- static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
-
- unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
- if (Reg == 0) {
- // For the 2nd half of a v2f64, do not just fail.
- if (CanFail)
- return false;
-
- // Put the whole thing on the stack.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
- State.AllocateStack(8, 8),
- LocVT, LocInfo));
- return true;
- }
-
- unsigned i;
- for (i = 0; i < 2; ++i)
- if (HiRegList[i] == Reg)
- break;
-
- unsigned T = State.AllocateReg(LoRegList[i]);
- (void)T;
- assert(T == LoRegList[i] && "Could not allocate register");
-
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- LocVT, LocInfo));
- return true;
-}
-
-static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
- return false;
- if (LocVT == MVT::v2f64 &&
- !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
- return false;
- return true; // we handled it
-}
-
-static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
- static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
-
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
- if (Reg == 0)
- return false; // we didn't handle it
-
- unsigned i;
- for (i = 0; i < 2; ++i)
- if (HiRegList[i] == Reg)
- break;
-
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
- LocVT, LocInfo));
- return true;
-}
-
-static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
- return false;
- if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
- return false;
- return true; // we handled it
-}
-
-static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
- return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
- State);
-}
-
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
@@ -954,23 +955,29 @@
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::C:
case CallingConv::Fast:
+ if (Subtarget->hasVFP2() && !isVarArg) {
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+ // For AAPCS ABI targets, just use VFP variant of the calling convention.
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ }
+ // Fallthrough
+ case CallingConv::C: {
// Use target triple & subtarget features to do actual dispatch.
- if (Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() &&
- FloatABIType == FloatABI::Hard && !isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
- else
- return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
- } else
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ if (!Subtarget->isAAPCS_ABI())
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ else if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard && !isVarArg)
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ }
case CallingConv::ARM_AAPCS_VFP:
- return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
case CallingConv::ARM_AAPCS:
- return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::ARM_APCS:
- return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
}
}
@@ -1060,7 +1067,7 @@
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile=*/false, /*AlwaysInline=*/false,
- NULL, 0, NULL, 0);
+ MachinePointerInfo(0), MachinePointerInfo(0));
}
/// LowerMemOpCallTo - Store the argument to the stack.
@@ -1073,11 +1080,11 @@
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal()) {
+ if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
- }
+
return DAG.getStore(Chain, dl, Arg, PtrOff,
- PseudoSourceValue::getStack(), LocMemOffset,
+ MachinePointerInfo::getStack(LocMemOffset),
false, false, 0);
}
@@ -1285,7 +1292,7 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
@@ -1299,7 +1306,7 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
}
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
@@ -1321,13 +1328,19 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
- } else
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ } else {
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ unsigned OpFlags = 0;
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ }
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
bool isStub = Subtarget->isTargetDarwin() &&
@@ -1343,13 +1356,19 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
- } else
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+ } else {
+ unsigned OpFlags = 0;
+ // On ELF targets for PIC code, direct calls should go through the PLT
+ if (Subtarget->isTargetELF() &&
+ getTargetMachine().getRelocationModel() == Reloc::PIC_)
+ OpFlags = ARMII::MO_PLT;
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+ }
}
// FIXME: handle tail calls differently.
@@ -1477,7 +1496,7 @@
// LR. This means if we need to reload LR, it takes an extra instructions,
// which outweighs the value of the tail call; but here we don't know yet
// whether LR is going to be used. Probably the right approach is to
- // generate the tail call here and turn it back into CALL/RET in
+ // generate the tail call here and turn it back into CALL/RET in
// emitEpilogue if LR is used.
if (Subtarget->isThumb1Only())
return false;
@@ -1570,7 +1589,7 @@
if (!VA.isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
- return false;
+ return false;
if (RegVT == MVT::v2f64) {
if (!ArgLocs[++i].isRegLoc())
return false;
@@ -1727,7 +1746,7 @@
}
CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
if (RelocM == Reloc::Static)
return Result;
@@ -1751,7 +1770,7 @@
SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue Chain = Argument.getValue(1);
@@ -1798,7 +1817,7 @@
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
Chain = Offset.getValue(1);
@@ -1806,7 +1825,7 @@
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
} else {
// local exec model
@@ -1814,7 +1833,7 @@
Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
}
@@ -1851,15 +1870,14 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue Chain = Result.getValue(1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
if (!UseGOTOFF)
Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0,
- false, false, 0);
+ MachinePointerInfo::getGOT(), false, false, 0);
return Result;
} else {
// If we have T2 ops, we can materialize the address directly via movt/movw
@@ -1871,7 +1889,7 @@
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
}
}
@@ -1899,7 +1917,7 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue Chain = Result.getValue(1);
@@ -1909,8 +1927,7 @@
}
if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
- Result = DAG.getLoad(PtrVT, dl, Chain, Result,
- PseudoSourceValue::getGOT(), 0,
+ Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
false, false, 0);
return Result;
@@ -1932,13 +1949,21 @@
SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+ const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
SDValue Val = DAG.getConstant(0, MVT::i32);
@@ -1981,7 +2006,7 @@
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result =
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, 0);
if (RelocM == Reloc::PIC_) {
@@ -1998,17 +2023,19 @@
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- // v6 and v7 can both handle barriers directly, but need handled a bit
- // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // Some subtargets which have dmb and dsb instructions can handle barriers
+ // directly. Some ARMv6 cpus can support them with the help of mcr
+ // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// never get here.
unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
- if (Subtarget->hasV7Ops())
+ if (Subtarget->hasDataBarrier())
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
- else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ else {
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
- assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return SDValue();
+ }
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -2021,8 +2048,8 @@
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0,
- false, false, 0);
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
}
SDValue
@@ -2039,7 +2066,7 @@
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2050,7 +2077,7 @@
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
@@ -2104,7 +2131,7 @@
int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
@@ -2173,7 +2200,7 @@
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- PseudoSourceValue::getFixedStack(FI), 0,
+ MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
@@ -2199,7 +2226,7 @@
AFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(VARegSaveSize,
ArgOffset + VARegSaveSize - VARegSize,
- true));
+ false));
SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
getPointerTy());
@@ -2215,8 +2242,8 @@
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
- 0, false, false, 0);
+ MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+ false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
@@ -2262,28 +2289,28 @@
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalICmpImmediate(C-1)) {
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalICmpImmediate(C-1)) {
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalICmpImmediate(C+1)) {
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -2320,6 +2347,52 @@
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Cond.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = Cond.getOperand(4);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -2383,8 +2456,7 @@
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
- Ld->getChain(), Ld->getBasePtr(),
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
@@ -2403,7 +2475,7 @@
SDValue Ptr = Ld->getBasePtr();
RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
Ld->getChain(), Ptr,
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->getPointerInfo(),
Ld->isVolatile(), Ld->isNonTemporal(),
Ld->getAlignment());
@@ -2413,7 +2485,7 @@
PtrType, Ptr, DAG.getConstant(4, PtrType));
RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
Ld->getChain(), NewPtr,
- Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+ Ld->getPointerInfo().getWithOffset(4),
Ld->isVolatile(), Ld->isNonTemporal(),
NewAlign);
return;
@@ -2538,14 +2610,14 @@
}
if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0,
+ MachinePointerInfo::getJumpTable(),
false, false, 0);
Chain = Addr.getValue(1);
Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
} else {
Addr = DAG.getLoad(PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, false, false, 0);
+ MachinePointerInfo::getJumpTable(), false, false, 0);
Chain = Addr.getValue(1);
return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
}
@@ -2617,7 +2689,7 @@
SDValue Offset = DAG.getConstant(4, MVT::i32);
return DAG.getLoad(VT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- NULL, 0, false, false, 0);
+ MachinePointerInfo(), false, false, 0);
}
// Return LR, which contains the return address. Mark it an implicit live-in.
@@ -2636,7 +2708,8 @@
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0,
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(),
false, false, 0);
return FrameAddr;
}
@@ -2764,6 +2837,24 @@
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -3080,6 +3171,11 @@
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3095,6 +3191,7 @@
ReverseVEXT = true;
}
+ if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
@@ -3120,13 +3217,16 @@
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
- if ((unsigned) M[i] !=
- (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
@@ -3142,8 +3242,8 @@
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + NumElts + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
return false;
}
return true;
@@ -3161,8 +3261,8 @@
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
return false;
}
return true;
@@ -3177,6 +3277,7 @@
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != 2 * i + WhichResult)
return false;
}
@@ -3202,7 +3303,8 @@
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
- if ((unsigned) M[i + j * Half] != Idx)
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
@@ -3225,8 +3327,8 @@
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx + NumElts)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
return false;
Idx += 1;
}
@@ -3251,8 +3353,8 @@
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
return false;
Idx += 1;
}
@@ -3286,7 +3388,7 @@
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
DebugLoc dl = Op.getDebugLoc();
@@ -3349,26 +3451,25 @@
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats. For f32 constant splats, reduce to
- // i32 and try again.
- if (usesOnlyOneValue && EltSize <= 32) {
- if (!isConstant)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- if (VT.getVectorElementType().isFloatingPoint()) {
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(i)));
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
- NumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- LowerBUILD_VECTOR(Val, DAG, ST));
- }
- SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
- return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
}
// If all elements are constants and the case above didn't get hit, fall back
@@ -3377,12 +3478,6 @@
if (isConstant)
return SDValue();
- if (!EnableARMVDUPsplat) {
- // Use VDUP for non-constant splats.
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
- }
-
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
// will be legalized.
@@ -3666,6 +3761,50 @@
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
}
+/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
+/// an extending load, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLs;
+ } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLu;
+ } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ } else {
+ // Other vector multiplications are legal.
+ return Op;
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op1 = SkipExtension(N1, DAG);
+
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3675,6 +3814,7 @@
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
@@ -3690,6 +3830,7 @@
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+ case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
@@ -3705,6 +3846,8 @@
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
}
return SDValue();
}
@@ -4082,78 +4225,6 @@
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
-
- case ARM::tANDsp:
- case ARM::tADDspr_:
- case ARM::tSUBspi_:
- case ARM::t2SUBrSPi_:
- case ARM::t2SUBrSPi12_:
- case ARM::t2SUBrSPs_: {
- MachineFunction *MF = BB->getParent();
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool DstIsDead = MI->getOperand(0).isDead();
- bool SrcIsKill = MI->getOperand(1).isKill();
-
- if (SrcReg != ARM::SP) {
- // Copy the source to SP from virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- }
-
- unsigned OpOpc = 0;
- bool NeedPred = false, NeedCC = false, NeedOp3 = false;
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("Unexpected pseudo instruction!");
- case ARM::tANDsp:
- OpOpc = ARM::tAND;
- NeedPred = true;
- break;
- case ARM::tADDspr_:
- OpOpc = ARM::tADDspr;
- break;
- case ARM::tSUBspi_:
- OpOpc = ARM::tSUBspi;
- break;
- case ARM::t2SUBrSPi_:
- OpOpc = ARM::t2SUBrSPi;
- NeedPred = true; NeedCC = true;
- break;
- case ARM::t2SUBrSPi12_:
- OpOpc = ARM::t2SUBrSPi12;
- NeedPred = true;
- break;
- case ARM::t2SUBrSPs_:
- OpOpc = ARM::t2SUBrSPs;
- NeedPred = true; NeedCC = true; NeedOp3 = true;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
- if (OpOpc == ARM::tAND)
- AddDefaultT1CC(MIB);
- MIB.addReg(ARM::SP);
- MIB.addOperand(MI->getOperand(2));
- if (NeedOp3)
- MIB.addOperand(MI->getOperand(3));
- if (NeedPred)
- AddDefaultPred(MIB);
- if (NeedCC)
- AddDefaultCC(MIB);
-
- // Copy the result from SP to virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(ARM::SP);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
- }
}
}
@@ -4232,7 +4303,6 @@
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
-
return SDValue();
}
@@ -4276,10 +4346,6 @@
if (Subtarget->isThumb1Only())
return SDValue();
- if (DAG.getMachineFunction().
- getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- return SDValue();
-
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
@@ -4426,18 +4492,48 @@
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // fmrrd(fmdrr x, y) -> x,y
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // vmovrrd(vmovdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
return SDValue();
}
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+ // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Op0.getOpcode() == ISD::BIT_CONVERT)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::BIT_CONVERT)
+ Op1 = Op1.getOperand(0);
+ if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+ Op0.getNode() == Op1.getNode() &&
+ Op0.getResNo() == 0 && Op1.getResNo() == 1)
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ N->getValueType(0), Op0.getOperand(0));
+ return SDValue();
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) {
+ // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+ // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
+ // into a pair of GPRs, which is fine when the value is used as a scalar,
+ // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+ if (N->getNumOperands() == 2)
+ return PerformVMOVDRRCombine(N, DAG);
+
+ return SDValue();
+}
+
/// PerformVDUPLANECombine - Target-specific dag combine xforms for
/// ARMISD::VDUPLANE.
-static SDValue PerformVDUPLANECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue PerformVDUPLANECombine(SDNode *N, SelectionDAG &DAG) {
// If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
// redundant.
SDValue Op = N->getOperand(0);
@@ -4459,8 +4555,7 @@
if (EltSize > VT.getVectorElementType().getSizeInBits())
return SDValue();
- SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
- return DCI.CombineTo(N, Res, false);
+ return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
}
/// getVShiftImm - Check if this is a valid build_vector for the immediate
@@ -4842,7 +4937,9 @@
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
- case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+ case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI.DAG);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
@@ -4856,15 +4953,7 @@
}
bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
- if (!Subtarget->hasV6Ops())
- // Pre-v6 does not support unaligned mem access.
- return false;
-
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- // FIXME: This is pretty conservative. Should we provide cmdline option to
- // control the behaviour?
- if (!Subtarget->isTargetDarwin())
+ if (!Subtarget->allowsUnalignedMem())
return false;
switch (VT.getSimpleVT().SimpleTy) {
@@ -5078,7 +5167,7 @@
if (!Subtarget->isThumb())
return ARM_AM::getSOImmVal(Imm) != -1;
if (Subtarget->isThumb2())
- return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
return Imm >= 0 && Imm <= 255;
}
@@ -5599,3 +5688,63 @@
return ARM::getVFPf64Imm(Imm) != -1;
return false;
}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMISelLowering.h Tue Oct 26 19:48:03 2010
@@ -71,8 +71,9 @@
VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
- EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
- EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
+ EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ EH_SJLJ_DISPATCHSETUP, // SjLj exception handling dispatch setup.
TC_RETURN, // Tail call return pseudo.
@@ -82,7 +83,7 @@
MEMBARRIER, // Memory barrier
SYNCBARRIER, // Memory sync barrier
-
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -143,6 +144,10 @@
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -284,6 +289,9 @@
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const;
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(EVT VT) const;
@@ -295,6 +303,8 @@
const TargetRegisterInfo *RegInfo;
+ const InstrItineraryData *Itins;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
@@ -323,6 +333,7 @@
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -335,6 +346,7 @@
SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
@@ -342,6 +354,7 @@
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrFormats.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrFormats.td Tue Oct 26 19:48:03 2010
@@ -36,37 +36,38 @@
def LdStExFrm : Format<11>;
def ArithMiscFrm : Format<12>;
-def ExtFrm : Format<13>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
-def VFPUnaryFrm : Format<14>;
-def VFPBinaryFrm : Format<15>;
-def VFPConv1Frm : Format<16>;
-def VFPConv2Frm : Format<17>;
-def VFPConv3Frm : Format<18>;
-def VFPConv4Frm : Format<19>;
-def VFPConv5Frm : Format<20>;
-def VFPLdStFrm : Format<21>;
-def VFPLdStMulFrm : Format<22>;
-def VFPMiscFrm : Format<23>;
-
-def ThumbFrm : Format<24>;
-def MiscFrm : Format<25>;
-
-def NGetLnFrm : Format<26>;
-def NSetLnFrm : Format<27>;
-def NDupFrm : Format<28>;
-def NLdStFrm : Format<29>;
-def N1RegModImmFrm: Format<30>;
-def N2RegFrm : Format<31>;
-def NVCVTFrm : Format<32>;
-def NVDupLnFrm : Format<33>;
-def N2RegVShLFrm : Format<34>;
-def N2RegVShRFrm : Format<35>;
-def N3RegFrm : Format<36>;
-def N3RegVShFrm : Format<37>;
-def NVExtFrm : Format<38>;
-def NVMulSLFrm : Format<39>;
-def NVTBLFrm : Format<40>;
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
// Misc flags.
@@ -84,25 +85,26 @@
//
// Addressing mode.
-class AddrMode<bits<4> val> {
- bits<4> Value = val;
+class AddrMode<bits<5> val> {
+ bits<5> Value = val;
}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1 : AddrMode<1>;
-def AddrMode2 : AddrMode<2>;
-def AddrMode3 : AddrMode<3>;
-def AddrMode4 : AddrMode<4>;
-def AddrMode5 : AddrMode<5>;
-def AddrMode6 : AddrMode<6>;
-def AddrModeT1_1 : AddrMode<7>;
-def AddrModeT1_2 : AddrMode<8>;
-def AddrModeT1_4 : AddrMode<9>;
-def AddrModeT1_s : AddrMode<10>;
-def AddrModeT2_i12: AddrMode<11>;
-def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12 : AddrMode<16>;
// Instruction size.
class SizeFlagVal<bits<3> val> {
@@ -137,25 +139,36 @@
// ARM special operands.
//
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+ string EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
}
// Same as cc_out except it defaults to setting CPSR.
def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+ string EncoderMethod = "getCCOutOpValue";
let PrintMethod = "printSBitModifierOperand";
}
// ARM special operands for disassembly only.
//
+def setend_op : Operand<i32> {
+ let PrintMethod = "printSetendOperand";
+}
def cps_opt : Operand<i32> {
let PrintMethod = "printCPSOptionOperand";
@@ -192,13 +205,13 @@
bit canXformTo16Bit = 0;
// The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
- let TSFlags{3-0} = AM.Value;
- let TSFlags{6-4} = SZ.Value;
- let TSFlags{8-7} = IndexModeBits;
- let TSFlags{14-9} = Form;
- let TSFlags{15} = isUnaryDataProc;
- let TSFlags{16} = canXformTo16Bit;
- let TSFlags{18-17} = D.Value;
+ let TSFlags{4-0} = AM.Value;
+ let TSFlags{7-5} = SZ.Value;
+ let TSFlags{9-8} = IndexModeBits;
+ let TSFlags{15-10} = Form;
+ let TSFlags{16} = isUnaryDataProc;
+ let TSFlags{17} = canXformTo16Bit;
+ let TSFlags{19-18} = D.Value;
let Constraints = cstr;
let Itinerary = itin;
@@ -234,12 +247,15 @@
string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
@@ -254,17 +270,22 @@
list<Predicate> Predicates = [IsARM];
}
-// Same as I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ bits<4> p; // Predicate operand
+ bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+ let Inst{31-28} = p;
+ let Inst{20} = s;
+
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -322,16 +343,14 @@
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-
// Atomic load/store instructions
-
class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 1;
+ let Inst{20} = 1;
let Inst{11-0} = 0b111110011111;
}
class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -340,7 +359,7 @@
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 0;
+ let Inst{20} = 0;
let Inst{11-4} = 0b11111001;
}
@@ -350,21 +369,21 @@
: I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -377,10 +396,42 @@
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// loads
+
+// LDR/LDRB
+class AIldr1<bits<3> op, bit opc22, dag oops, dag iops, AddrMode am, Format f,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = op;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = opc22;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = 1;
+}
+// LDRH/LDRSB/LDRSH/LDRD
+class AIldr2<bits<4> op, bit opc22, bit opc20, dag oops, dag iops, AddrMode am,
+ Format f, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+ "", pattern> {
+ let Inst{27-25} = 0b000;
+ let Inst{24} = 1; // 24 == P
+ // 23 == U
+ let Inst{22} = opc22;
+ let Inst{21} = 0; // 21 == W
+ let Inst{20} = opc20;
+
+ let Inst{7-4} = op;
+}
+
+
+
+
class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
@@ -389,7 +440,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -399,7 +450,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -409,7 +460,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -419,7 +470,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// stores
@@ -431,7 +482,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -441,7 +492,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -451,7 +502,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -461,7 +512,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed loads
@@ -473,7 +524,7 @@
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -483,7 +534,7 @@
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed stores
@@ -495,7 +546,7 @@
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -505,7 +556,7 @@
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed loads
@@ -517,7 +568,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -527,7 +578,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed stores
@@ -539,7 +590,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -549,7 +600,7 @@
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// addrmode3 instructions
@@ -893,24 +944,65 @@
}
// Most significant word multiply
-class AMul2I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
opc, asm, "", pattern> {
- let Inst{7-4} = 0b1001;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{7-4} = opc7_4;
let Inst{20} = 1;
let Inst{27-21} = opcod;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
}
// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
-class AMulxyI<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
opc, asm, "", pattern> {
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{4} = 0;
let Inst{7} = 1;
let Inst{20} = 0;
let Inst{27-21} = opcod;
+ let Inst{6-5} = bit6_5;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
}
// Extend instructions.
@@ -918,16 +1010,47 @@
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
opc, asm, "", pattern> {
+ // All AExtI instructions have Rd and Rm register operands.
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
let Inst{7-4} = 0b0111;
+ let Inst{9-8} = 0b00;
let Inst{27-20} = opcod;
}
// Misc Arithmetic instructions.
-class AMiscA1I<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+ opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{27-20} = opcod;
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rd;
+ let Inst{11-8} = 0b1111;
+ let Inst{7-4} = opc7_4;
+ let Inst{3-0} = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
opc, asm, "", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<8> sh;
let Inst{27-20} = opcod;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = tb;
+ let Inst{5-4} = 0b01;
+ let Inst{3-0} = Rm;
}
//===----------------------------------------------------------------------===//
@@ -977,7 +1100,7 @@
Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
- let Inst{12} = opcod3;
+ let Inst{12} = opcod3;
}
// BR_JT instructions
@@ -1019,7 +1142,7 @@
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = !con(oops, (outs s_cc_out:$s));
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
}
@@ -1041,7 +1164,7 @@
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
}
@@ -1099,13 +1222,13 @@
// A6.2.4 Load/store single data item encoding.
class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
let Inst{15-12} = opA;
- let Inst{11-9} = opB;
+ let Inst{11-9} = opB;
}
-class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
+class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
-class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
// A6.2.5 Miscellaneous 16-bit instructions encoding.
class T1Misc<bits<7> opcode> : Encoding16 {
@@ -1120,14 +1243,15 @@
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
-// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
// more consistent.
class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1136,7 +1260,7 @@
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+ let AsmString = !strconcat(opc, "${s}${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1185,11 +1309,11 @@
pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
- let Inst{24} = P;
- let Inst{23} = ?; // The U bit.
- let Inst{22} = 1;
- let Inst{21} = W;
- let Inst{20} = load;
+ let Inst{24} = P;
+ let Inst{23} = ?; // The U bit.
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = load;
}
class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1220,19 +1344,19 @@
: InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
- let Inst{24} = signed;
- let Inst{23} = 0;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
let Inst{22-21} = opcod;
- let Inst{20} = load;
- let Inst{11} = 1;
+ let Inst{20} = load;
+ let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
- let Inst{10} = pre; // The P bit.
- let Inst{8} = 1; // The W bit.
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
}
// Helper class for disassembly only
@@ -1243,9 +1367,9 @@
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b011;
- let Inst{23} = long;
+ let Inst{23} = long;
let Inst{22-20} = op22_20;
- let Inst{7-4} = op7_4;
+ let Inst{7-4} = op7_4;
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1274,9 +1398,11 @@
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+ bits<4> p;
+ let Inst{31-28} = p;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, "${p}", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasVFP2];
}
@@ -1307,7 +1433,8 @@
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1011;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
// 64-bit loads & stores operate on both NEON and VFP pipelines.
let D = VFPNeonDomain;
@@ -1321,29 +1448,43 @@
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+ cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasVFP2];
}
// Load / store multiple
-class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
- let Inst{11-8} = 0b1011;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
// 64-bit loads & stores operate on both NEON and VFP pipelines.
let D = VFPNeonDomain;
}
-class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
}
// Double precision, unary
@@ -1354,7 +1495,8 @@
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{19-16} = opcod3;
- let Inst{11-8} = 0b1011;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
}
@@ -1366,9 +1508,10 @@
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Double precision, binary, VML[AS] (for additional predicate)
@@ -1378,13 +1521,13 @@
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 1; // Double precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
list<Predicate> Predicates = [HasVFP2, UseVMLx];
}
-
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1393,7 +1536,8 @@
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{19-16} = opcod3;
- let Inst{11-8} = 0b1010;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
let Inst{7-6} = opcod4;
let Inst{4} = opcod5;
}
@@ -1414,9 +1558,10 @@
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
- let Inst{11-8} = 0b1010;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-9} = 0b101;
+ let Inst{8} = 0; // Single precision
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Single precision binary, if no NEON
@@ -1496,9 +1641,7 @@
: InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(
- !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
- !strconcat("\t", asm));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
@@ -1510,7 +1653,7 @@
: InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
+ let AsmString = !strconcat(opc, "${p}", "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
@@ -1521,10 +1664,28 @@
: NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
cstr, pattern> {
let Inst{31-24} = 0b11110100;
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{7-4} = op7_4;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+ list<dag> pattern>
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ let Pattern = pattern;
+ list<Predicate> Predicates = [HasNEON];
}
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1548,13 +1709,23 @@
string opc, string dt, string asm, string cstr,
list<dag> pattern>
: NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-19} = op21_19;
- let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{5} = op5;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<13> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{24} = SIMM{7};
+ let Inst{18-16} = SIMM{6-4};
+ let Inst{3-0} = SIMM{3-0};
}
// NEON 2 vector register format.
@@ -1567,9 +1738,18 @@
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// Same as N2V except it doesn't have a datatype suffix.
@@ -1582,9 +1762,18 @@
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// NEON 2 vector register with immediate.
@@ -1592,12 +1781,23 @@
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vm;
+ bits<6> SIMM;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
+ let Inst{21-16} = SIMM{5-0};
}
// NEON 3 vector register format.
@@ -1605,12 +1805,24 @@
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// Same as N3V except it doesn't have a data type suffix.
@@ -1619,30 +1831,40 @@
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+
+ // Instruction operands.
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+
+ let Inst{15-12} = Vd{3-0};
+ let Inst{22} = Vd{4};
+ let Inst{19-16} = Vn{3-0};
+ let Inst{7} = Vn{4};
+ let Inst{3-0} = Vm{3-0};
+ let Inst{5} = Vm{4};
}
// NEON VMOVs between scalar and core registers.
class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
+ : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
"", itin> {
let Inst{27-20} = opcod1;
- let Inst{11-8} = opcod2;
- let Inst{6-5} = opcod3;
- let Inst{4} = 1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(
- !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)),
- !strconcat("\t", asm));
+ let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
let Pattern = pattern;
list<Predicate> Predicates = [HasNEON];
}
@@ -1670,9 +1892,9 @@
let Inst{24-23} = 0b11;
let Inst{21-20} = 0b11;
let Inst{19-16} = op19_16;
- let Inst{11-7} = 0b11000;
- let Inst{6} = op6;
- let Inst{4} = 0;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.cpp Tue Oct 26 19:48:03 2010
@@ -33,7 +33,7 @@
default: break;
case ARM::LDR_PRE:
case ARM::LDR_POST:
- return ARM::LDR;
+ return ARM::LDRi12;
case ARM::LDRH_PRE:
case ARM::LDRH_POST:
return ARM::LDRH;
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.td (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrInfo.td Tue Oct 26 19:48:03 2010
@@ -44,6 +44,10 @@
SDTCisVT<3, i32>, SDTCisVT<4, i32>,
SDTCisVT<5, OtherVT>]>;
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -54,10 +58,12 @@
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
@@ -106,7 +112,7 @@
[SDNPOutFlag]>;
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
- [SDNPOutFlag,SDNPCommutative]>;
+ [SDNPOutFlag, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
@@ -118,20 +124,23 @@
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
- SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+ SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
-def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
-def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
+
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
+def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
-def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
@@ -140,34 +149,34 @@
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
-def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
-def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
-
-def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -192,12 +201,6 @@
return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
}]>;
-// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
-def rot_imm : PatLeaf<(i32 imm), [{
- int32_t v = (int32_t)N->getZExtValue();
- return v == 8 || v == 16 || v == 24;
-}]>;
-
/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
def imm1_15 : PatLeaf<(i32 imm), [{
return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
@@ -229,15 +232,11 @@
PatLeaf<(imm), [{
return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
+ string EncoderMethod = "getBitfieldInvertedMaskOpValue";
let PrintMethod = "printBitfieldInvMaskImmOperand";
}
/// Split a 32-bit immediate into two 16 bit parts.
-def lo16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
- MVT::i32);
-}]>;
-
def hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
}]>;
@@ -300,10 +299,25 @@
let PrintMethod = "printPCLabel";
}
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+ int32_t v = (int32_t)N->getZExtValue();
+ return v == 8 || v == 16 || v == 24; }]> {
+ string EncoderMethod = "getRotImmOpValue";
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+}
+
// shifter_operand operands: so_reg and so_imm.
def so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShifterOperandReg",
[shl,srl,sra,rotr]> {
+ string EncoderMethod = "getSORegOpValue";
let PrintMethod = "printSORegOperand";
let MIOperandInfo = (ops GPR, GPR, i32imm);
}
@@ -313,10 +327,8 @@
// represented in the imm field in the same 12-bit form that they are encoded
// into so_imm instructions: the 8-bit immediate is the least significant bits
// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
-def so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
- }]> {
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+ string EncoderMethod = "getSOImmOpValue";
let PrintMethod = "printSOImmOperand";
}
@@ -361,10 +373,36 @@
return (int32_t)N->getZExtValue() < 32;
}]>;
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
+ return (int32_t)N->getZExtValue() < 32;
+}]> {
+ string EncoderMethod = "getImmMinusOneOpValue";
+}
+
// Define ARM specific addressing modes.
-// addrmode2 := reg +/- reg shop imm
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+
+ string EncoderMethod = "getAddrModeImm12OpValue";
+ let PrintMethod = "printAddrModeImm12Operand";
+ let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+ ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+ // FIXME: Simplify the printer
+ let PrintMethod = "printAddrMode2Operand";
+ let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
// addrmode2 := reg +/- imm12
+// := reg +/- reg shop imm
//
def addrmode2 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode2", []> {
@@ -373,7 +411,8 @@
}
def am2offset : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+ ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printAddrMode2OffsetOperand";
let MIOperandInfo = (ops GPR, i32imm);
}
@@ -388,7 +427,8 @@
}
def am3offset : Operand<i32>,
- ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+ ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printAddrMode3OffsetOperand";
let MIOperandInfo = (ops GPR, i32imm);
}
@@ -444,51 +484,93 @@
/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
/// binop that produces a value.
-multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
}
- def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
let isCommutable = Commutable;
- }
- def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
/// instruction modifies the CPSR register.
let Defs = [CPSR] in {
-multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
- def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
- let Inst{20} = 1;
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ iii, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ let Inst{20} = 1;
}
- def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
- let isCommutable = Commutable;
+ def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ iir, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
- let Inst{20} = 1;
let Inst{25} = 0;
- }
- def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{20} = 1;
+ }
+ def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+ iis, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{20} = 1;
}
}
}
@@ -496,147 +578,223 @@
/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
-multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
- def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
- opc, "\t$a, $b",
- [(opnode GPR:$a, so_imm:$b)]> {
- let Inst{20} = 1;
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
+ def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+ opc, "\t$Rn, $imm",
+ [(opnode GPR:$Rn, so_imm:$imm)]> {
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{15-12} = 0b0000;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ let Inst{20} = 1;
+ let Inst{20} = 1;
}
- def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
- opc, "\t$a, $b",
- [(opnode GPR:$a, GPR:$b)]> {
+ def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+ opc, "\t$Rn, $Rm",
+ [(opnode GPR:$Rn, GPR:$Rm)]> {
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
- let Inst{20} = 1;
let Inst{25} = 0;
let isCommutable = Commutable;
- }
- def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
- opc, "\t$a, $b",
- [(opnode GPR:$a, so_reg:$b)]> {
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = 0b0000;
+ let Inst{19-16} = Rn;
let Inst{20} = 1;
+ }
+ def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+ opc, "\t$Rn, $shift",
+ [(opnode GPR:$Rn, so_reg:$shift)]> {
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = 0b0000;
+ let Inst{19-16} = Rn;
+ let Inst{20} = 1;
}
}
}
-/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
-multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
- IIC_iUNAr, opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rm))]>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
let Inst{11-10} = 0b00;
let Inst{19-16} = 0b1111;
}
- def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
- IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<2> rot;
+ let Inst{15-12} = Rd;
+ let Inst{11-10} = rot;
+ let Inst{3-0} = Rm;
let Inst{19-16} = 0b1111;
}
}
-multiclass AI_unary_rrot_np<bits<8> opcod, string opc> {
- def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
- IIC_iUNAr, opc, "\t$dst, $src",
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+ def r : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iEXTr, opc, "\t$Rd, $Rm",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
let Inst{11-10} = 0b00;
let Inst{19-16} = 0b1111;
}
- def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
- IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
+ def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+ IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
+ bits<2> rot;
+ let Inst{11-10} = rot;
let Inst{19-16} = 0b1111;
}
}
-/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
- def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
- IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV6]> {
let Inst{11-10} = 0b00;
}
- def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
- i32imm:$rot),
- IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]>,
- Requires<[IsARM, HasV6]>;
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+ [(set GPR:$Rd, (opnode GPR:$Rn,
+ (rotr GPR:$Rm, rot_imm:$rot)))]>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+ }
}
// For disassembly only.
-multiclass AI_bin_rrot_np<bits<8> opcod, string opc> {
- def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
- IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+ def rr : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
let Inst{11-10} = 0b00;
}
- def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
- i32imm:$rot),
- IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
+ def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+ rot_imm:$rot),
+ IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]>;
+ Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<2> rot;
+ let Inst{19-16} = Rn;
+ let Inst{11-10} = rot;
+ }
}
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let Uses = [CPSR] in {
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
- def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+ def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
let Inst{25} = 1;
- }
- def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+ }
+ def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM]> {
- let isCommutable = Commutable;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
- }
- def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ }
+ def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
// Carry setting variants
let Defs = [CPSR] in {
multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
- def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
- [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+ def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
let Inst{20} = 1;
let Inst{25} = 1;
}
- def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
- [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+ def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{11-4} = 0b00000000;
+ let isCommutable = Commutable;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{20} = 1;
let Inst{25} = 0;
}
- def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
- [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+ def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
+ [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
Requires<[IsARM]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -644,6 +802,34 @@
}
}
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit opc22, string opc, InstrItinClass iii,
+ InstrItinClass iir, PatFrag opnode> {
+ // Note: We use the complex addrmode_imm12 rather than just an input
+ // GPR and a constrained immediate so that we can use this to match
+ // frame index references and avoid matching constant pool references.
+ def i12 : AIldr1<0b010, opc22, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+ [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = addr{16-13}; // Rn
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+ }
+ def rs : AIldr1<0b011, opc22, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+ AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+ [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+ bits<4> Rt;
+ bits<17> shift;
+ let Inst{23} = shift{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = shift{16-13}; // Rn
+ let Inst{11-0} = shift{11-0};
+ }
+}
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -659,21 +845,18 @@
let neverHasSideEffects = 1, isNotDuplicable = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
- i32imm:$size), NoItinerary,
- "${instid:label} ${cpidx:cpentry}", []>;
+ i32imm:$size), NoItinerary, "", []>;
// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
// from removing one half of the matched pairs. That breaks PEI, which assumes
// these will always be in pairs, and asserts if it finds otherwise. Better way?
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKUP :
-PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
- "${:comment} ADJCALLSTACKUP $amt1",
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary, "",
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKDOWN :
-PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
- "${:comment} ADJCALLSTACKDOWN $amt",
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, "",
[(ARMcallseq_start timm:$amt)]>;
}
@@ -681,6 +864,7 @@
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000000;
}
@@ -688,6 +872,7 @@
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000001;
}
@@ -695,6 +880,7 @@
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000010;
}
@@ -702,6 +888,7 @@
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000011;
}
@@ -709,14 +896,22 @@
"\t$dst, $a, $b",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
let Inst{27-20} = 0b01101000;
let Inst{7-4} = 0b1011;
+ let Inst{11-8} = 0b1111;
}
def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-16} = 0b001100100000;
+ let Inst{15-8} = 0b11110000;
let Inst{7-0} = 0b00000100;
}
@@ -725,6 +920,9 @@
def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM]> {
+ bits<16> val;
+ let Inst{3-0} = val{3-0};
+ let Inst{19-8} = val{15-4};
let Inst{27-20} = 0b00010010;
let Inst{7-4} = 0b0111;
}
@@ -735,6 +933,7 @@
// opt{5} = changemode from Inst{17}
// opt{8-6} = AIF from Inst{8-6}
// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+// FIXME: Integrated assembler will need these split out.
def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM]> {
@@ -775,39 +974,28 @@
defm PLDW : APreLoad<1, 0, "pldw">;
defm PLI : APreLoad<0, 1, "pli">;
-def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
- [/* For disassembly only; pattern left blank */]>,
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+ "setend\t$end",
+ [/* For disassembly only; pattern left blank */]>,
Requires<[IsARM]> {
- let Inst{31-28} = 0b1111;
- let Inst{27-20} = 0b00010000;
- let Inst{16} = 1;
- let Inst{9} = 1;
- let Inst{7-4} = 0b0000;
-}
-
-def SETENDLE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tle",
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM]> {
- let Inst{31-28} = 0b1111;
- let Inst{27-20} = 0b00010000;
- let Inst{16} = 1;
- let Inst{9} = 0;
- let Inst{7-4} = 0b0000;
+ bits<1> end;
+ let Inst{31-10} = 0b1111000100000001000000;
+ let Inst{9} = end;
+ let Inst{8-0} = 0;
}
def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV7]> {
- let Inst{27-16} = 0b001100100000;
- let Inst{7-4} = 0b1111;
+ bits<4> opt;
+ let Inst{27-4} = 0b001100100000111100001111;
+ let Inst{3-0} = opt;
}
// A5.4 Permanently UNDEFINED instructions.
-// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
-// binutils
let isBarrier = 1, isTerminator = 1 in
-def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
- ".long 0xe7ffdefe ${:comment} trap", [(trap)]>,
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ "trap", [(trap)]>,
Requires<[IsARM]> {
let Inst{27-25} = 0b011;
let Inst{24-20} = 0b11111;
@@ -816,43 +1004,48 @@
}
// Address computation and loads and stores in PIC mode.
+// FIXME: These PIC insn patterns are pseudos, but derive from the normal insn
+// classes (AXI1, et.al.) and so have encoding information and such,
+// which is suboptimal. Once the rest of the code emitter (including
+// JIT) is MC-ized we should look at refactoring these into true
+// pseudos.
let isNotDuplicable = 1 in {
def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
- Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p\t$dst, pc, $a",
+ Pseudo, IIC_iALUr, "",
[(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
let AddedComplexity = 10 in {
def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr",
+ Pseudo, IIC_iLoad_r, "",
[(set GPR:$dst, (load addrmodepc:$addr))]>;
def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr",
+ Pseudo, IIC_iLoad_bh_r, "",
[(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr",
+ Pseudo, IIC_iLoad_bh_r, "",
[(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr",
+ Pseudo, IIC_iLoad_bh_r, "",
[(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr",
+ Pseudo, IIC_iLoad_bh_r, "",
[(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
}
let AddedComplexity = 10 in {
def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr",
+ Pseudo, IIC_iStore_r, "",
[(store GPR:$src, addrmodepc:$addr)]>;
def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr",
+ Pseudo, IIC_iStore_bh_r, "",
[(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
- Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr",
+ Pseudo, IIC_iStore_bh_r, "",
[(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
}
} // isNotDuplicable = 1
@@ -860,6 +1053,9 @@
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+// FIXME: These are marked as pseudos, but they're really not(?). They're just
+// the ADR instruction. Is this the right way to handle that? They need
+// encoding information regardless.
let neverHasSideEffects = 1 in {
let isReMaterializable = 1 in
def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
@@ -883,20 +1079,14 @@
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]>,
Requires<[IsARM, HasV4T]> {
- let Inst{3-0} = 0b1110;
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ let Inst{27-0} = 0b0001001011111111111100011110;
}
// ARMV4 only
- def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"mov", "\tpc, lr", [(ARMretflag)]>,
Requires<[IsARM, NoV4T]> {
- let Inst{11-0} = 0b000000001110;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
+ let Inst{27-0} = 0b0001101000001111000000001110;
}
}
@@ -906,21 +1096,18 @@
def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
[(brind GPR:$dst)]>,
Requires<[IsARM, HasV4T]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
}
// ARMV4 only
def MOVPCRX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "mov\tpc, $dst",
[(brind GPR:$dst)]>,
Requires<[IsARM, NoV4T]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000110100000111100000000;
+ let Inst{3-0} = dst;
}
}
@@ -930,7 +1117,7 @@
hasExtraDefRegAllocReq = 1 in
def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_Br,
+ IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
@@ -941,14 +1128,15 @@
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl\t${func:call}",
+ IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsARM, IsNotDarwin]> {
let Inst{31-28} = 0b1110;
+ // FIXME: Encoding info for $func. Needs fixups bits.
}
def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl", "\t${func:call}",
+ IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
Requires<[IsARM, IsNotDarwin]>;
@@ -957,9 +1145,9 @@
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>,
Requires<[IsARM, HasV5T, IsNotDarwin]> {
- let Inst{7-4} = 0b0011;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
}
// ARMv4T
@@ -968,9 +1156,9 @@
IIC_Br, "mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, HasV4T, IsNotDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110001;
+ let Inst{3-0} = func;
}
// ARMv4
@@ -978,10 +1166,9 @@
IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, NoV4T, IsNotDarwin]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
+ bits<4> func;
+ let Inst{27-4} = 0b000110100000111100000000;
+ let Inst{3-0} = func;
}
}
@@ -992,13 +1179,14 @@
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
def BLr9 : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl\t${func:call}",
+ IIC_Br, "bl\t$func",
[(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
let Inst{31-28} = 0b1110;
+ // FIXME: Encoding info for $func. Needs fixups bits.
}
def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
- IIC_Br, "bl", "\t${func:call}",
+ IIC_Br, "bl", "\t$func",
[(ARMcall_pred tglobaladdr:$func)]>,
Requires<[IsARM, IsDarwin]>;
@@ -1006,9 +1194,9 @@
def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
IIC_Br, "blx\t$func",
[(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
- let Inst{7-4} = 0b0011;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110011;
+ let Inst{3-0} = func;
}
// ARMv4T
@@ -1017,9 +1205,9 @@
IIC_Br, "mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, HasV4T, IsDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
+ bits<4> func;
+ let Inst{27-4} = 0b000100101111111111110001;
+ let Inst{3-0} = func;
}
// ARMv4
@@ -1027,15 +1215,16 @@
IIC_Br, "mov\tlr, pc\n\tmov\tpc, $func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, NoV4T, IsDarwin]> {
- let Inst{11-4} = 0b00000000;
- let Inst{15-12} = 0b1111;
- let Inst{19-16} = 0b0000;
- let Inst{27-20} = 0b00011010;
+ bits<4> func;
+ let Inst{27-4} = 0b000110100000111100000000;
+ let Inst{3-0} = func;
}
}
// Tail calls.
+// FIXME: These should probably be xformed into the non-TC versions of the
+// instructions as part of MC lowering.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
// Darwin versions.
let Defs = [R0, R1, R2, R3, R9, R12,
@@ -1062,10 +1251,9 @@
def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
[]>, Requires<[IsDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
}
}
@@ -1094,10 +1282,9 @@
def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
[]>, Requires<[IsNotDarwin]> {
- let Inst{7-4} = 0b0001;
- let Inst{19-8} = 0b111111111111;
- let Inst{27-20} = 0b00010010;
- let Inst{31-28} = 0b1110;
+ bits<4> dst;
+ let Inst{31-4} = 0b1110000100101111111111110001;
+ let Inst{3-0} = dst;
}
}
}
@@ -1162,14 +1349,18 @@
// Secure Monitor Call is a system instruction -- for disassembly only
def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
[/* For disassembly only; pattern left blank */]> {
- let Inst{23-20} = 0b0110;
- let Inst{7-4} = 0b0111;
+ bits<4> opt;
+ let Inst{23-4} = 0b01100000000000000111;
+ let Inst{3-0} = opt;
}
// Supervisor Call (Software Interrupt) -- for disassembly only
let isCall = 1 in {
def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
- [/* For disassembly only; pattern left blank */]>;
+ [/* For disassembly only; pattern left blank */]> {
+ bits<24> svc;
+ let Inst{23-0} = svc;
+}
}
// Store Return State is a system instruction -- for disassembly only
@@ -1207,91 +1398,98 @@
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
- "ldr", "\t$dst, $addr",
- [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+
+defm LDR : AI_ldr1<0, "ldr", IIC_iLoad_i, IIC_iLoad_r,
+ UnOpFrag<(load node:$Src)>>;
// Special LDR for loads from non-pc-relative constpools.
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
isReMaterializable = 1 in
-def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
- "ldr", "\t$dst, $addr", []>;
+def LDRcp : AIldr1<0b010, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+ AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", []> {
+ bits<4> Rt;
+ bits<17> addr;
+ let Inst{23} = addr{12}; // U (add = ('U' == 1))
+ let Inst{19-16} = 0b1111;
+ let Inst{15-12} = Rt;
+ let Inst{11-0} = addr{11-0}; // imm12
+}
// Loads with zero extension
def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrh", "\t$dst, $addr",
+ IIC_iLoad_bh_r, "ldrh", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
- IIC_iLoadr, "ldrb", "\t$dst, $addr",
+ IIC_iLoad_bh_r, "ldrb", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
// Loads with sign extension
def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrsh", "\t$dst, $addr",
+ IIC_iLoad_bh_r, "ldrsh", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrsb", "\t$dst, $addr",
+ IIC_iLoad_bh_r, "ldrsb", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldrd", "\t$dst1, $addr",
+ IIC_iLoad_d_r, "ldrd", "\t$dst1, $addr",
[]>, Requires<[IsARM, HasV5TE]>;
// Indexed loads
def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
+ (ins addrmode2:$addr), LdFrm, IIC_iLoad_ru,
"ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+ (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoad_ru,
"ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
+ (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode2:$addr), LdFrm, IIC_iLoadru,
+ (ins addrmode2:$addr), LdFrm, IIC_iLoad_bh_ru,
"ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+ (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoad_bh_ru,
"ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
+ (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
+ (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_ru,
"ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
// For disassembly only
def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
- (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr,
+ (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>,
Requires<[IsARM, HasV5TE]>;
// For disassembly only
def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr,
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_d_ru,
"ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
Requires<[IsARM, HasV5TE]>;
@@ -1300,94 +1498,94 @@
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
+ (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoad_ru,
"ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+ (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoad_bh_ru,
"ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoad_bh_ru,
"ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
// Store
-def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
+def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStore_r,
"str", "\t$src, $addr",
[(store GPR:$src, addrmode2:$addr)]>;
// Stores with truncate
def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
- IIC_iStorer, "strh", "\t$src, $addr",
+ IIC_iStore_bh_r, "strh", "\t$src, $addr",
[(truncstorei16 GPR:$src, addrmode3:$addr)]>;
-def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
- "strb", "\t$src, $addr",
+def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+ IIC_iStore_bh_r, "strb", "\t$src, $addr",
[(truncstorei8 GPR:$src, addrmode2:$addr)]>;
// Store doubleword
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
- StMiscFrm, IIC_iStorer,
+ StMiscFrm, IIC_iStore_d_r,
"strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
// Indexed stores
def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, am2offset:$offset),
- StFrm, IIC_iStoreru,
+ StFrm, IIC_iStore_ru,
"str", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
def STR_POST : AI2stwpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
+ StFrm, IIC_iStore_ru,
"str", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
(post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_ru,
"strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_bh_ru,
"strh", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti16 GPR:$src,
GPR:$base, am3offset:$offset))]>;
def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
+ StFrm, IIC_iStore_bh_ru,
"strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
+ StFrm, IIC_iStore_bh_ru,
"strb", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
@@ -1395,14 +1593,14 @@
// For disassembly only
def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
(ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$src1, $src2, [$base, $offset]!",
"$base = $base_wb", []>;
// For disassembly only
def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
(ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_d_ru,
"strd", "\t$src1, $src2, [$base], $offset",
"$base = $base_wb", []>;
@@ -1410,7 +1608,7 @@
def STRT : AI2stwpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
+ StFrm, IIC_iStore_ru,
"strt", "\t$src, [$base], $offset", "$base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
@@ -1418,7 +1616,7 @@
def STRBT : AI2stbpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
- StFrm, IIC_iStoreru,
+ StFrm, IIC_iStore_bh_ru,
"strbt", "\t$src, [$base], $offset", "$base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
@@ -1426,7 +1624,7 @@
def STRHT: AI3sthpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
- StMiscFrm, IIC_iStoreru,
+ StMiscFrm, IIC_iStore_bh_ru,
"strht", "\t$src, [$base], $offset", "$base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
@@ -1439,12 +1637,12 @@
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeNone, LdStMulFrm, IIC_iLoadm,
+ IndexModeNone, LdStMulFrm, IIC_iLoad_m,
"ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iLoadm,
+ IndexModeUpd, LdStMulFrm, IIC_iLoad_mu,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
@@ -1452,12 +1650,12 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IndexModeNone, LdStMulFrm, IIC_iStorem,
+ IndexModeNone, LdStMulFrm, IIC_iStore_m,
"stm${addr:submode}${p}\t$addr, $srcs", "", []>;
def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IndexModeUpd, LdStMulFrm, IIC_iStorem,
+ IndexModeUpd, LdStMulFrm, IIC_iStore_mu,
"stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>;
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
@@ -1467,50 +1665,79 @@
//
let neverHasSideEffects = 1 in
-def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
- "mov", "\t$dst, $src", []>, UnaryDP {
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+ "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
}
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
-def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm,
- IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP {
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+ IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
}
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
- "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
+ "mov", "\t$Rd, $src", [(set GPR:$Rd, so_reg:$src)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> src;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = src;
let Inst{25} = 0;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
- "mov", "\t$dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP {
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+ "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+ bits<4> Rd;
+ bits<12> imm;
let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = 0b0000;
+ let Inst{11-0} = imm;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm:$imm),
DPFrm, IIC_iMOVi,
- "movw", "\t$dst, $src",
- [(set GPR:$dst, imm0_65535:$src)]>,
+ "movw", "\t$Rd, $imm",
+ [(set GPR:$Rd, imm0_65535:$imm)]>,
Requires<[IsARM, HasV6T2]>, UnaryDP {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
}
-let Constraints = "$src = $dst" in
-def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+let Constraints = "$src = $Rd" in
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm:$imm),
DPFrm, IIC_iMOVi,
- "movt", "\t$dst, $imm",
- [(set GPR:$dst,
+ "movt", "\t$Rd, $imm",
+ [(set GPR:$Rd,
(or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<16> imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm{11-0};
+ let Inst{19-16} = imm{15-12};
let Inst{20} = 0;
let Inst{25} = 1;
}
@@ -1519,20 +1746,20 @@
Requires<[IsARM, HasV6T2]>;
let Uses = [CPSR] in
-def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
- "mov", "\t$dst, $src, rrx",
- [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, "",
+ [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+ Requires<[IsARM]>;
// These aren't really mov instructions, but we have to define them this way
// due to flag operands.
let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
-def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "movs", "\t$dst, $src, asr #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, "",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, "",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+ Requires<[IsARM]>;
}
//===----------------------------------------------------------------------===//
@@ -1541,31 +1768,31 @@
// Sign extenders
-defm SXTB : AI_unary_rrot<0b01101010,
- "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm SXTH : AI_unary_rrot<0b01101011,
- "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm SXTB : AI_ext_rrot<0b01101010,
+ "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH : AI_ext_rrot<0b01101011,
+ "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm SXTAB : AI_bin_rrot<0b01101010,
+defm SXTAB : AI_exta_rrot<0b01101010,
"sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm SXTAH : AI_bin_rrot<0b01101011,
+defm SXTAH : AI_exta_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
// For disassembly only
-defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">;
+defm SXTB16 : AI_ext_rrot_np<0b01101000, "sxtb16">;
// For disassembly only
-defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">;
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
// Zero extenders
let AddedComplexity = 16 in {
-defm UXTB : AI_unary_rrot<0b01101110,
- "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm UXTH : AI_unary_rrot<0b01101111,
- "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm UXTB16 : AI_unary_rrot<0b01101100,
- "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+defm UXTB : AI_ext_rrot<0b01101110,
+ "uxtb" , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH : AI_ext_rrot<0b01101111,
+ "uxth" , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+ "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
// The transformation should probably be done as a combiner action
@@ -1576,33 +1803,49 @@
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16r_rot GPR:$Src, 8)>;
-defm UXTAB : AI_bin_rrot<0b01101110, "uxtab",
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
// For disassembly only
-defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">;
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
-def SBFX : I<(outs GPR:$dst),
- (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
- "sbfx", "\t$dst, $src, $lsb, $width", "", []>,
+def SBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
let Inst{27-21} = 0b0111101;
let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
}
-def UBFX : I<(outs GPR:$dst),
- (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi,
- "ubfx", "\t$dst, $src, $lsb, $width", "", []>,
+def UBFX : I<(outs GPR:$Rd),
+ (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<5> lsb;
+ bits<5> width;
let Inst{27-21} = 0b0111111;
let Inst{6-4} = 0b101;
+ let Inst{20-16} = width;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = lsb;
+ let Inst{3-0} = Rn;
}
//===----------------------------------------------------------------------===//
@@ -1610,14 +1853,18 @@
//
defm ADD : AsI1_bin_irs<0b0100, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(add node:$LHS, node:$RHS)>, 1>;
defm SUB : AsI1_bin_irs<0b0010, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsr,
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
@@ -1629,65 +1876,141 @@
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-// These don't define reg/reg forms, because they are handled above.
-def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
- let Inst{25} = 1;
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
}
-def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
- let Inst{25} = 0;
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+ IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
// RSB with 's' bit set.
let Defs = [CPSR] in {
-def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsbs", "\t$dst, $a, $b",
- [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
- let Inst{20} = 1;
- let Inst{25} = 1;
-}
-def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsbs", "\t$dst, $a, $b",
- [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
- let Inst{20} = 1;
- let Inst{25} = 0;
+def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+ IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
let Uses = [CPSR] in {
-def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{25} = 1;
-}
-def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+}
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{25} = 0;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
// FIXME: Allow these to be predicated.
let Defs = [CPSR], Uses = [CPSR] in {
-def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+ DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
+ [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{20} = 1;
- let Inst{25} = 1;
-}
-def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{20} = 1;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
+ let Inst{11-0} = imm;
+}
+def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+ DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
+ [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
Requires<[IsARM]> {
- let Inst{20} = 1;
- let Inst{25} = 0;
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<12> shift;
+ let Inst{25} = 0;
+ let Inst{20} = 1;
+ let Inst{11-0} = shift;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = Rn;
}
}
@@ -1714,179 +2037,249 @@
// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
list<dag> pattern = [/* For disassembly only; pattern left blank */]>
- : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
- opc, "\t$dst, $a, $b", pattern> {
+ : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iALUr,
+ opc, "\t$Rd, $Rn, $Rm", pattern> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{27-20} = op27_20;
- let Inst{7-4} = op7_4;
+ let Inst{11-4} = op11_4;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
}
// Saturating add/subtract -- for disassembly only
-def QADD : AAI<0b00010000, 0b0101, "qadd",
- [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
-def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
-def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
-def QASX : AAI<0b01100010, 0b0011, "qasx">;
-def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
-def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
-def QSAX : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB : AAI<0b00010010, 0b0101, "qsub",
- [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
-def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
-def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
-def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
-def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">;
-def UQASX : AAI<0b01100110, 0b0011, "uqasx">;
-def UQSAX : AAI<0b01100110, 0b0101, "uqsax">;
-def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">;
-def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">;
+def QADD : AAI<0b00010000, 0b00000101, "qadd",
+ [(set GPR:$Rd, (int_arm_qadd GPR:$Rn, GPR:$Rm))]>;
+def QSUB : AAI<0b00010010, 0b00000101, "qsub",
+ [(set GPR:$Rd, (int_arm_qsub GPR:$Rn, GPR:$Rm))]>;
+def QDADD : AAI<0b00010100, 0b00000101, "qdadd">;
+def QDSUB : AAI<0b00010110, 0b00000101, "qdsub">;
+
+def QADD16 : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16 : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b11111111, "uqsub8">;
// Signed/Unsigned add/subtract -- for disassembly only
-def SASX : AAI<0b01100001, 0b0011, "sasx">;
-def SADD16 : AAI<0b01100001, 0b0001, "sadd16">;
-def SADD8 : AAI<0b01100001, 0b1001, "sadd8">;
-def SSAX : AAI<0b01100001, 0b0101, "ssax">;
-def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">;
-def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">;
-def UASX : AAI<0b01100101, 0b0011, "uasx">;
-def UADD16 : AAI<0b01100101, 0b0001, "uadd16">;
-def UADD8 : AAI<0b01100101, 0b1001, "uadd8">;
-def USAX : AAI<0b01100101, 0b0101, "usax">;
-def USUB16 : AAI<0b01100101, 0b0111, "usub16">;
-def USUB8 : AAI<0b01100101, 0b1111, "usub8">;
+def SASX : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b11111111, "usub8">;
// Signed/Unsigned halving add/subtract -- for disassembly only
-def SHASX : AAI<0b01100011, 0b0011, "shasx">;
-def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">;
-def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">;
-def SHSAX : AAI<0b01100011, 0b0101, "shsax">;
-def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">;
-def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">;
-def UHASX : AAI<0b01100111, 0b0011, "uhasx">;
-def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">;
-def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">;
-def UHSAX : AAI<0b01100111, 0b0101, "uhsax">;
-def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">;
-def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">;
+def SHASX : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">;
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
MulFrm /* for convenience */, NoItinerary, "usad8",
- "\t$dst, $a, $b", []>,
+ "\t$Rd, $Rn, $Rm", []>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{27-20} = 0b01111000;
let Inst{15-12} = 0b1111;
let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
}
-def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
MulFrm /* for convenience */, NoItinerary, "usada8",
- "\t$dst, $a, $b, $acc", []>,
+ "\t$Rd, $Rn, $Rm, $Ra", []>,
Requires<[IsARM, HasV6]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ bits<4> Ra;
let Inst{27-20} = 0b01111000;
let Inst{7-4} = 0b0001;
+ let Inst{19-16} = Rd;
+ let Inst{15-12} = Ra;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
}
// Signed/Unsigned saturate -- for disassembly only
-def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b001;
-}
-
-def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
+ let Inst{20-16} = sat_imm;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{3-0} = Rn;
}
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
- NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
+def SSAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$Rn), SatFrm,
+ NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
[/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
let Inst{27-20} = 0b01101010;
- let Inst{7-4} = 0b0011;
-}
-
-def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b001;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
}
-def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<5> sat_imm;
+ bits<4> Rn;
+ bits<8> sh;
let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = sh{7-3};
+ let Inst{6} = sh{0};
+ let Inst{20-16} = sat_imm;
+ let Inst{3-0} = Rn;
}
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
- NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+ NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
[/* For disassembly only; pattern left blank */]> {
+ bits<4> Rd;
+ bits<4> sat_imm;
+ bits<4> Rn;
let Inst{27-20} = 0b01101110;
- let Inst{7-4} = 0b0011;
+ let Inst{11-4} = 0b11110011;
+ let Inst{15-12} = Rd;
+ let Inst{19-16} = sat_imm;
+ let Inst{3-0} = Rn;
}
-def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSATlsl imm:$pos, GPR:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USATlsl imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
defm AND : AsI1_bin_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm BIC : AsI1_bin_irs<0b1110, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsr,
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
-def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFC : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfc", "\t$dst, $imm", "$src = $dst",
- [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+ "bfc", "\t$Rd, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<10> imm;
let Inst{27-21} = 0b0111110;
let Inst{6-0} = 0b0011111;
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $val, $imm", "$src = $dst",
- [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+ [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<10> imm;
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+ let Inst{15-12} = Rd;
+ let Inst{11-7} = imm{4-0}; // lsb
+ let Inst{20-16} = imm{9-5}; // width
+ let Inst{3-0} = Rn;
}
-def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
- "mvn", "\t$dst, $src",
- [(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
+def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+ "mvn", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
let Inst{11-4} = 0b00000000;
+ let Inst{15-12} = Rd;
+ let Inst{3-0} = Rm;
}
-def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
- IIC_iMOVsr, "mvn", "\t$dst, $src",
- [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
+def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+ IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+ [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<12> shift;
let Inst{25} = 0;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = shift;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
- IIC_iMOVi, "mvn", "\t$dst, $imm",
- [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
- let Inst{25} = 1;
+def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+ IIC_iMVNi, "mvn", "\t$Rd, $imm",
+ [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<12> imm;
+ let Inst{25} = 1;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-12} = Rd;
+ let Inst{11-0} = imm;
}
def : ARMPat<(and GPR:$src, so_imm_not:$imm),
@@ -1895,247 +2288,247 @@
//===----------------------------------------------------------------------===//
// Multiply Instructions.
//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
let isCommutable = 1 in
-def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, "mul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
-
-def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "mla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
+def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>;
+
+def MLA : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
-def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iMAC32, "mls", "\t$dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
- Requires<[IsARM, HasV6T2]>;
+ Requires<[IsARM, HasV6T2]> {
+ bits<4> Rd;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = Rd;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
// Extra precision multiplies with low / high results
+
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMUL64,
- "smull", "\t$ldst, $hdst, $a, $b", []>;
-
-def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMUL64,
- "umull", "\t$ldst, $hdst, $a, $b", []>;
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+ "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
}
// Multiply + accumulate
-def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMAC64,
- "smlal", "\t$ldst, $hdst, $a, $b", []>;
-
-def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMAC64,
- "umlal", "\t$ldst, $hdst, $a, $b", []>;
-
-def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
- (ins GPR:$a, GPR:$b), IIC_iMAC64,
- "umaal", "\t$ldst, $hdst, $a, $b", []>,
- Requires<[IsARM, HasV6]>;
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+ "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+ Requires<[IsARM, HasV6]> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ bits<4> Rm;
+ bits<4> Rn;
+ let Inst{19-16} = RdLo;
+ let Inst{15-12} = RdHi;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
} // neverHasSideEffects
// Most significant word multiply
-def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, "smmul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0001;
let Inst{15-12} = 0b1111;
}
-def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, "smmulr", "\t$dst, $a, $b",
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0011; // R = 1
let Inst{15-12} = 0b1111;
}
-def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0001;
-}
-
-def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c",
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0011; // R = 1
-}
-
-def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1101;
-}
+ Requires<[IsARM, HasV6]>;
-def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
- IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c",
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+ Requires<[IsARM, HasV6]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1111; // R = 1
-}
+ Requires<[IsARM, HasV6]>;
multiclass AI_smul<string opc, PatFrag opnode> {
- def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
- }
-
- def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
- }
-
- def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
- }
-
- def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
- }
-
- def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
- }
-
- def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16)))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
- }
+ def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+ [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+ Requires<[IsARM, HasV5TE]>;
}
multiclass AI_smla<string opc, PatFrag opnode> {
- def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc,
- (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
- }
-
- def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
- }
-
- def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
- }
-
- def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
- }
-
- def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
- }
-
- def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
- }
+ def BB : AMulxyI<0b0001000, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra,
+ (opnode (sext_inreg GPR:$Rn, i16),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def BT : AMulxyI<0b0001000, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TB : AMulxyI<0b0001000, 0b01, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sext_inreg GPR:$Rm, i16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def TT : AMulxyI<0b0001000, 0b11, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+ (sra GPR:$Rm, (i32 16)))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WB : AMulxyI<0b0001001, 0b00, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
+
+ def WT : AMulxyI<0b0001001, 0b10, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+ [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+ (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+ Requires<[IsARM, HasV5TE]>;
}
defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
-def SMLALBB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 0;
-}
+ Requires<[IsARM, HasV5TE]>;
-def SMLALBT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 0;
- let Inst{6} = 1;
-}
+ Requires<[IsARM, HasV5TE]>;
-def SMLALTB : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 0;
-}
+ Requires<[IsARM, HasV5TE]>;
-def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm),
+ IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
[/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{5} = 1;
- let Inst{6} = 1;
-}
+ Requires<[IsARM, HasV5TE]>;
// Helper class for AI_smld -- for disassembly only
-class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
- InstrItinClass itin, string opc, string asm>
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
: AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ bits<4> Rn;
+ bits<4> Rm;
let Inst{4} = 1;
let Inst{5} = swap;
let Inst{6} = sub;
@@ -2143,21 +2536,46 @@
let Inst{21-20} = 0b00;
let Inst{22} = long;
let Inst{27-23} = 0b01110;
+ let Inst{11-8} = Rm;
+ let Inst{3-0} = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Rd;
+ let Inst{15-12} = 0b1111;
+ let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> Ra;
+ let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+ bits<4> RdLo;
+ bits<4> RdHi;
+ let Inst{19-16} = RdHi;
+ let Inst{15-12} = RdLo;
}
multiclass AI_smld<bit sub, string opc> {
- def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">;
+ def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
- def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
- NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">;
+ def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
- def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b),
- NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">;
-
- def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
- NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">;
+ def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+ def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+ (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+ !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
}
@@ -2166,16 +2584,10 @@
multiclass AI_sdml<bit sub, string opc> {
- def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> {
- let Inst{15-12} = 0b1111;
- }
-
- def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> {
- let Inst{15-12} = 0b1111;
- }
-
+ def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+ NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
}
defm SMUA : AI_sdml<0, "smua">;
@@ -2185,110 +2597,151 @@
// Misc. Arithmetic Instructions.
//
-def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src",
- [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
- let Inst{7-4} = 0b0001;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def RBIT : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rbit", "\t$dst, $src",
- [(set GPR:$dst, (ARMrbit GPR:$src))]>,
- Requires<[IsARM, HasV6T2]> {
- let Inst{7-4} = 0b0011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev", "\t$dst, $src",
- [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b0011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev16", "\t$dst, $src",
- [(set GPR:$dst,
- (or (and (srl GPR:$src, (i32 8)), 0xFF),
- (or (and (shl GPR:$src, (i32 8)), 0xFF00),
- (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
- (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
-
-def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "revsh", "\t$dst, $src",
- [(set GPR:$dst,
+def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "clz", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+ Requires<[IsARM, HasV6T2]>;
+
+def REV : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev", "\t$Rd, $Rm",
+ [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+ [(set GPR:$Rd,
+ (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
+ (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
+ (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
+ (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+ Requires<[IsARM, HasV6]>;
+
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+ IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+ [(set GPR:$Rd,
(sext_inreg
- (or (srl (and GPR:$src, 0xFF00), (i32 8)),
- (shl GPR:$src, (i32 8))), i16))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{7-4} = 0b1011;
- let Inst{11-8} = 0b1111;
- let Inst{19-16} = 0b1111;
-}
+ (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+ (shl GPR:$Rm, (i32 8))), i16))]>,
+ Requires<[IsARM, HasV6]>;
+
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
-def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
- 0xFFFF0000)))]>,
- Requires<[IsARM, HasV6]> {
- let Inst{6-4} = 0b001;
-}
+def lsl_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+ (and (shl GPR:$Rm, lsl_amt:$sh),
+ 0xFFFF0000)))]>,
+ Requires<[IsARM, HasV6]>;
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+ (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+ (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
-
-def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
- let Inst{6-4} = 0b101;
-}
+def asr_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+ (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+ [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+ (and (sra GPR:$Rm, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]>;
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
defm CMP : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// When x is 0 and unsigned:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
//defm CMN : AI1_cmp_irs<0b1011, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>;
defm TEQ : AI1_cmp_irs<0b1001, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>;
defm CMPz : AI1_cmp_irs<0b1010, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
defm CMNz : AI1_cmp_irs<0b1011, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
@@ -2301,15 +2754,12 @@
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
def BCCi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
- IIC_Br,
- "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br, "",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
def BCCZi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
- IIC_Br,
- "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, "",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
} // usesCustomInserter
@@ -2317,11 +2767,21 @@
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
+// FIXME: These should all be pseudo-instructions that get expanded to
+// the normal MOV instructions. That would fix the dependency on
+// special casing them in tblgen.
let neverHasSideEffects = 1 in {
-def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
- IIC_iCMOVr, "mov", "\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
- RegConstraint<"$false = $dst">, UnaryDP {
+def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
+ IIC_iCMOVr, "mov", "\t$Rd, $Rm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+ RegConstraint<"$false = $Rd">, UnaryDP {
+ bits<4> Rd;
+ bits<4> Rm;
+
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+ let Inst{3-0} = Rm;
+ let Inst{15-12} = Rd;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
@@ -2334,6 +2794,16 @@
let Inst{25} = 0;
}
+def MOVCCi16 : AI1<0b1000, (outs GPR:$dst), (ins GPR:$false, i32imm:$src),
+ DPFrm, IIC_iMOVi,
+ "movw", "\t$dst, $src",
+ []>,
+ RegConstraint<"$false = $dst">, Requires<[IsARM, HasV6T2]>,
+ UnaryDP {
+ let Inst{20} = 0;
+ let Inst{25} = 1;
+}
+
def MOVCCi : AI1<0b1101, (outs GPR:$dst),
(ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi,
"mov", "\t$dst, $true",
@@ -2349,204 +2819,141 @@
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def Int_MemBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
// See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_SyncBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
// See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
- [(ARMMemBarrierV6 GPR:$zero)]>,
+ [(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
-def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 4",
- [(ARMSyncBarrierV6 GPR:$zero)]>,
+ [(ARMSyncBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DSB
// FIXME: add encoding
}
}
-// Helper class for multiclass MemB -- for disassembly only
-class AMBI<string opc, string asm>
- : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf57;
-}
-
-multiclass MemB<bits<4> op7_4, string opc> {
+// Memory Barrier Operations Variants -- for disassembly only
- def st : AMBI<opc, "\tst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1110;
- }
-
- def ish : AMBI<opc, "\tish"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1011;
- }
-
- def ishst : AMBI<opc, "\tishst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1010;
- }
-
- def nsh : AMBI<opc, "\tnsh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0111;
- }
-
- def nshst : AMBI<opc, "\tnshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0110;
- }
-
- def osh : AMBI<opc, "\tosh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0011;
- }
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+}
- def oshst : AMBI<opc, "\toshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0010;
- }
+class AMBI<bits<4> op7_4, string opc>
+ : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-8} = 0xf57ff0;
+ let Inst{7-4} = op7_4;
}
// These DMB variants are for disassembly only.
-defm DMB : MemB<0b0101, "dmb">;
+def DMBvar : AMBI<0b0101, "dmb">;
// These DSB variants are for disassembly only.
-defm DSB : MemB<0b0100, "dsb">;
+def DSBvar : AMBI<0b0100, "dsb">;
// ISB has only full system option -- for disassembly only
-def ISBsy : AMBI<"isb", ""> {
- let Inst{7-4} = 0b0110;
+def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = 0b1111;
}
let usesCustomInserter = 1 in {
let Uses = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_SUB_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_AND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_OR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_XOR_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_NAND_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_ADD_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_SUB_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_AND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_OR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_XOR_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_NAND_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_ADD_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_SUB_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_AND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_OR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_XOR_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_LOAD_NAND_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
- "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, "",
[(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
def ATOMIC_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, "",
[(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
def ATOMIC_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, "",
[(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
def ATOMIC_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, "",
[(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
def ATOMIC_CMP_SWAP_I8 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, "",
[(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
def ATOMIC_CMP_SWAP_I16 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, "",
[(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
def ATOMIC_CMP_SWAP_I32 : PseudoInst<
- (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
- "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, "",
[(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
}
}
@@ -2648,12 +3055,7 @@
D31 ], hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ Pseudo, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
@@ -2663,12 +3065,7 @@
hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ Pseudo, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
@@ -2678,15 +3075,21 @@
Defs = [ R7, LR, SP ] in {
def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "ldr\tsp, [$src, #8]\n\t"
- "ldr\t$scratch, [$src, #4]\n\t"
- "ldr\tr7, [$src]\n\t"
- "bx\t$scratch", "",
+ Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
Requires<[IsARM, IsDarwin]>;
}
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is usef for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary, "",
+ [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+ Requires<[IsDarwin]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -2694,9 +3097,11 @@
// Large immediate handling.
// Two piece so_imms.
+// FIXME: Expand this in ARMExpandPseudoInsts.
+// FIXME: Remove this when we can do generalized remat.
let isReMaterializable = 1 in
def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
- Pseudo, IIC_iMOVi,
+ Pseudo, IIC_iMOVix2,
"mov", "\t$dst, $src",
[(set GPR:$dst, so_imm2part:$src)]>,
Requires<[IsARM, NoV6T2]>;
@@ -2719,10 +3124,9 @@
// as a single unit instead of having to handle reg inputs.
// FIXME: Remove this when we can do generalized remat.
let isReMaterializable = 1 in
-def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set GPR:$dst, (i32 imm:$src))]>,
- Requires<[IsARM, HasV6T2]>;
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, "",
+ [(set GPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsARM, HasV6T2]>;
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrNEON.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrNEON.td Tue Oct 26 19:48:03 2010
@@ -93,6 +93,11 @@
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
@@ -124,42 +129,39 @@
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1, neverHasSideEffects = 1 in {
-// Use vldmia to load a Q register as a D register pair.
-// This is equivalent to VLDMD except that it has a Q register operand
-// instead of a pair of D registers.
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
def VLDMQ
- : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
- IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
-
-// Use vld1 to load a Q register as a D register pair.
-// This alternative to VLDMQ allows an alignment to be specified.
-// This is equivalent to VLD1q64 except that it has a Q register operand.
-def VLD1q
- : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
- IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
-} // mayLoad = 1, neverHasSideEffects = 1
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-// Use vstmia to store a Q register as a D register pair.
-// This is equivalent to VSTMD except that it has a Q register operand
-// instead of a pair of D registers.
+ : PseudoVFPLdStM<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoad_m, "",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
def VSTMQ
- : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
- IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
-
-// Use vst1 to store a Q register as a D register pair.
-// This alternative to VSTMQ allows an alignment to be specified.
-// This is equivalent to VST1q64 except that it has a Q register operand.
-def VST1q
- : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
- IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
-} // mayStore = 1, neverHasSideEffects = 1
+ : PseudoVFPLdStM<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStore_m, "",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), itin,
+ "$addr.addr = $wb">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
@@ -167,7 +169,7 @@
"vld1", Dt, "\\{$dst\\}, $addr", "", []>;
class VLD1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD1,
+ (ins addrmode6:$addr), IIC_VLD1x2,
"vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
def VLD1d8 : VLD1D<0b0000, "8">;
@@ -180,16 +182,21 @@
def VLD1q32 : VLD1Q<0b1000, "32">;
def VLD1q64 : VLD1Q<0b1100, "64">;
+def VLD1q8Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
+
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1u,
"vld1", Dt, "\\{$dst\\}, $addr$offset",
"$addr.addr = $wb", []>;
class VLD1QWB<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", Dt, "${dst:dregpair}, $addr$offset",
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1x2u,
+ "vld1", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
"$addr.addr = $wb", []>;
def VLD1d8_UPD : VLD1DWB<0b0000, "8">;
@@ -202,14 +209,19 @@
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+def VLD1q8Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
+ (ins addrmode6:$addr), IIC_VLD1x3, "vld1", Dt,
"\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
class VLD1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1x3u, "vld1", Dt,
"\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
def VLD1d8T : VLD1D3<0b0000, "8">;
@@ -222,15 +234,18 @@
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+
// ...with 4 registers (some of these are only for the disassembler):
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
+ (ins addrmode6:$addr), IIC_VLD1x4, "vld1", Dt,
"\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
class VLD1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, "vld1", Dt,
"\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
[]>;
@@ -244,6 +259,9 @@
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
@@ -252,7 +270,7 @@
class VLD2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD2,
+ (ins addrmode6:$addr), IIC_VLD2x2,
"vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
@@ -263,16 +281,24 @@
def VLD2q16 : VLD2Q<0b0100, "16">;
def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2d8Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+
+def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2u,
"vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset",
"$addr.addr = $wb", []>;
class VLD2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, 0b0011, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2,
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2x2u,
"vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -284,6 +310,14 @@
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2d8Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+
// ...with double-spaced registers (for disassembly only):
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
@@ -302,11 +336,15 @@
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3,
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3u,
"vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -314,6 +352,10 @@
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
@@ -322,10 +364,14 @@
def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -338,6 +384,10 @@
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -350,6 +400,10 @@
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4>;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
@@ -358,10 +412,41 @@
def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4>;
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst),
+ (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst),
+ (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst),
+ (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@@ -370,26 +455,29 @@
class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+ IIC_VLD2ln, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2", []>;
def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
// ...with double-spaced registers:
def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
-def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
// ...with address register writeback:
class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
"\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
"$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
@@ -397,14 +485,21 @@
def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
+def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
+ nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
@@ -412,13 +507,16 @@
def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
// ...with double-spaced registers:
def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
-def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
// ...with address register writeback:
class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -426,7 +524,7 @@
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
- IIC_VLD3, "vld3", Dt,
+ IIC_VLD3lnu, "vld3", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
[]>;
@@ -435,15 +533,22 @@
def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
+def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
+ nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
@@ -451,13 +556,16 @@
def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
// ...with double-spaced registers:
def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
-def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
// ...with address register writeback:
class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -465,7 +573,7 @@
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
- IIC_VLD4, "vld4", Dt,
+ IIC_VLD4ln, "vld4", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
[]>;
@@ -474,9 +582,16 @@
def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
+def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
@@ -486,13 +601,32 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+ "$addr.addr = $wb">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+ "$addr.addr = $wb">;
+
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
- "vst1", Dt, "\\{$src\\}, $addr", "", []>;
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src),
+ IIC_VST1, "vst1", Dt, "\\{$src\\}, $addr", "", []>;
class VST1Q<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b1010,op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST1x2,
"vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
def VST1d8 : VST1D<0b0000, "8">;
@@ -505,15 +639,21 @@
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
+
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST,
+ (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST1u,
"vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>;
class VST1QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
- "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+ (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
+ IIC_VST1x2u, "vst1", Dt, "\\{$src1, $src2\\}, $addr$offset",
+ "$addr.addr = $wb", []>;
def VST1d8_UPD : VST1DWB<0b0000, "8">;
def VST1d16_UPD : VST1DWB<0b0100, "16">;
@@ -525,16 +665,21 @@
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
+ IIC_VST1x3, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
class VST1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
+ IIC_VST1x3u, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
"$addr.addr = $wb", []>;
def VST1d8T : VST1D3<0b0000, "8">;
@@ -547,17 +692,20 @@
def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+
// ...with 4 registers (some of these are only for the disassembler):
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ IIC_VST1x4, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
[]>;
class VST1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+ "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
"$addr.addr = $wb", []>;
def VST1d8Q : VST1D4<0b0000, "8">;
@@ -570,15 +718,18 @@
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+ IIC_VST2, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
class VST2Q<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST2x2, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
def VST2d8 : VST2D<0b1000, 0b0000, "8">;
@@ -589,17 +740,25 @@
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
+def VST2d8Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+
+def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
+ IIC_VST2u, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset",
"$addr.addr = $wb", []>;
class VST2QWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+ "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
"$addr.addr = $wb", []>;
def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
@@ -610,6 +769,14 @@
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2d8Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
@@ -621,18 +788,22 @@
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST3,
"vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
+def VST3d8Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST3u,
"vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -640,6 +811,10 @@
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
def VST3q16 : VST3D<0b0101, 0b0100, "16">;
@@ -648,27 +823,35 @@
def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
// ...alternate versions to be allocated odd register numbers:
-def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST4, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
+def VST4d8Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
+ DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
"vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -676,6 +859,10 @@
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
@@ -684,10 +871,38 @@
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
// ...alternate versions to be allocated odd register numbers:
-def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+ itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+ nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@@ -696,26 +911,29 @@
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+ IIC_VST2ln, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
"", []>;
def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
// ...with double-spaced registers:
def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
-def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
// ...with address register writeback:
class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
+ DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
"\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -723,34 +941,44 @@
def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
+def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VST, "vst3", Dt,
+ nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
// ...with double-spaced registers:
def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
-def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
// ...with address register writeback:
class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
- IIC_VST, "vst3", Dt,
+ IIC_VST3lnu, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -758,14 +986,21 @@
def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
+def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VST, "vst4", Dt,
+ nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
"", []>;
@@ -773,20 +1008,23 @@
def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
// ...with double-spaced registers:
def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
-// ...alternate versions to be allocated odd register numbers:
-def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
-def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
// ...with address register writeback:
class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
- IIC_VST, "vst4", Dt,
+ IIC_VST4lnu, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
@@ -794,9 +1032,16 @@
def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
+def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
@@ -879,6 +1124,15 @@
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
+ [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -888,14 +1142,14 @@
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
-// Long 2-register intrinsics (currently only used for VMOVL).
-class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
(ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
@@ -923,9 +1177,9 @@
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
let isCommutable = Commutable;
}
// Same as N3VD but no data type.
@@ -966,9 +1220,9 @@
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ (outs QPR:$Qd), (ins QPR:$Qn, QPR:$Qm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Qd, $Qn, $Qm", "",
+ [(set QPR:$Qd, (ResTy (OpNode (OpTy QPR:$Qn), (OpTy QPR:$Qm))))]> {
let isCommutable = Commutable;
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1009,9 +1263,9 @@
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
let isCommutable = Commutable;
}
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -1035,14 +1289,23 @@
(Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
let isCommutable = 0;
}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+ let isCommutable = 0;
+}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
let isCommutable = Commutable;
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -1069,6 +1332,15 @@
imm:$lane)))))]> {
let isCommutable = 0;
}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+ OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+ let isCommutable = 0;
+}
// Multiply-Add/Sub operations: single-, double- and quad-register.
class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1083,10 +1355,11 @@
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst, (Ty (OpNode DPR:$src1,
- (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode ShOp>
@@ -1104,24 +1377,24 @@
string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
- (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
NVMulSLFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
- [(set (Ty DPR:$dst),
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+ [(set (Ty DPR:$Vd),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (MulOp DPR:$src2,
- (Ty (NEONvduplane (Ty DPR_8:$src3),
+ (Ty (MulOp DPR:$Vn,
+ (Ty (NEONvduplane (Ty DPR_8:$Vm),
imm:$lane)))))))]>;
class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst, (Ty (OpNode QPR:$src1,
- (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDNode MulOp, SDNode ShOp>
@@ -1150,6 +1423,24 @@
(ResTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))))]>;
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1169,16 +1460,63 @@
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_8:$src3),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+ (TyD DPR:$Vm)))))))]>;
+
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
@@ -1217,6 +1555,61 @@
let isCommutable = Commutable;
}
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$Qd), (ins DPR:$Dn, DPR:$Dm), N3RegFrm, itin,
+ OpcodeStr, Dt, "$Qd, $Dn, $Dm", "",
+ [(set QPR:$Qd, (OpNode (TyQ (ExtOp (TyD DPR:$Dn))),
+ (TyQ (ExtOp (TyD DPR:$Dm)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
+ (TyD DPR:$src2))))))]> {
+ let isCommutable = Commutable;
+}
+
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -1248,14 +1641,15 @@
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
imm:$lane)))))]>;
-// Wide 3-register intrinsics.
-class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
- Intrinsic IntOp, bit Commutable>
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
- OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
+ (outs QPR:$Qd), (ins QPR:$Qn, DPR:$Dm), N3RegFrm, IIC_VSUBiD,
+ OpcodeStr, Dt, "$Qd, $Qn, $Dm", "",
+ [(set QPR:$Qd, (OpNode (TyQ QPR:$Qn),
+ (TyQ (ExtOp (TyD DPR:$Dm)))))]> {
let isCommutable = Commutable;
}
@@ -1283,17 +1677,17 @@
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VPALiD,
- OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
- [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VPALiQ,
- OpcodeStr, Dt, "$dst, $src2", "$src1 = $dst",
- [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+ OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
// Shift by immediate,
// both double- and quad-register.
@@ -1488,6 +1882,23 @@
}
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
// Neon Narrowing 2-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -1508,14 +1919,14 @@
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
- def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
- def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
- def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
@@ -1543,6 +1954,27 @@
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp, Commutable>;
}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp> {
+ // 64-bit vector types.
+ def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, IntOp>;
+ def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, IntOp>;
+
+ // 128-bit vector types.
+ def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, IntOp>;
+ def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, IntOp>;
+}
multiclass N3VIntSL_HS<bits<4> op11_8,
InstrItinClass itinD16, InstrItinClass itinD32,
@@ -1573,6 +2005,21 @@
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v8i8 : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp>;
+ def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, IntOp>;
+}
+
// ....then also with element size of 64 bits:
multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
@@ -1589,6 +2036,20 @@
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, IntOp, Commutable>;
}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+ InstrItinClass itinD16, InstrItinClass itinD32,
+ InstrItinClass itinQ16, InstrItinClass itinQ32,
+ string OpcodeStr, string Dt,
+ Intrinsic IntOp>
+ : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+ OpcodeStr, Dt, IntOp> {
+ def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp>;
+ def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp>;
+}
// Neon Narrowing 3-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
@@ -1607,6 +2068,47 @@
}
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
// Neon Long 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1643,21 +2145,36 @@
v8i16, v8i8, IntOp, Commutable>;
}
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
-multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
- def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i16, v8i8, IntOp, Commutable>;
- def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i32, v4i16, IntOp, Commutable>;
- def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i64, v2i32, IntOp, Commutable>;
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
@@ -1700,6 +2217,29 @@
mul, ShOp>;
}
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1723,6 +2263,29 @@
}
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
// Neon Long 3-argument intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1752,6 +2315,21 @@
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
@@ -1996,13 +2574,13 @@
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
-defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "s", int_arm_neon_vaddls, 1>;
-defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", int_arm_neon_vaddlu, 1>;
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
-defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
-defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -2040,9 +2618,9 @@
"p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
"p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
-def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
+def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
-def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
+def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
v4f32, v4f32, fmul, 1>;
defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
@@ -2113,16 +2691,14 @@
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", int_arm_neon_vmulls, 1>;
-defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", int_arm_neon_vmullu, 1>;
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
- int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
- int_arm_neon_vmullu>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
@@ -2172,13 +2748,13 @@
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2224,13 +2800,13 @@
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2247,13 +2823,13 @@
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
-defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "s", int_arm_neon_vsubls, 1>;
-defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", int_arm_neon_vsublu, 1>;
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
-defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -2298,9 +2874,11 @@
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
// For disassembly only.
+// FIXME: This instruction's encoding MAY NOT BE correct.
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
"$dst, $src, #0">;
// For disassembly only.
+// FIXME: This instruction's encoding MAY NOT BE correct.
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
"$dst, $src, #0">;
@@ -2314,9 +2892,11 @@
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
// For disassembly only.
+// FIXME: This instruction's encoding MAY NOT BE correct.
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
"$dst, $src, #0">;
// For disassembly only.
+// FIXME: This instruction's encoding MAY NOT BE correct.
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$dst, $src, #0">;
@@ -2387,23 +2967,34 @@
// VMVN : Vector Bitwise NOT (Immediate)
let isReMaterializable = 1 in {
+
def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$dst, $SIMM", "",
- [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
+ [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmvn", "i16", "$dst, $SIMM", "",
- [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
+ [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$dst, $SIMM", "",
- [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
+ [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmvn", "i32", "$dst, $SIMM", "",
- [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
+ [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
}
// VMVN : Vector Bitwise NOT
@@ -2419,45 +3010,47 @@
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
-def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR:$src3),
+def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VCNTiD,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst,
- (v2i32 (or (and DPR:$src2, DPR:$src1),
- (and DPR:$src3, (vnotd DPR:$src1)))))]>;
-def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set DPR:$Vd,
+ (v2i32 (or (and DPR:$Vn, DPR:$src1),
+ (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VCNTiQ,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (v4i32 (or (and QPR:$src2, QPR:$src1),
- (and QPR:$src3, (vnotq QPR:$src1)))))]>;
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+ [(set QPR:$Vd,
+ (v4i32 (or (and QPR:$Vn, QPR:$src1),
+ (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
- "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
- "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
+ (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
N3RegFrm, IIC_VBINiD,
- "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
N3RegFrm, IIC_VBINiQ,
- "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
[/* For disassembly only; pattern left blank */]>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
@@ -2469,32 +3062,32 @@
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", int_arm_neon_vabds, 0>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", int_arm_neon_vabdu, 0>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
- "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
- "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "u", int_arm_neon_vabdlu, 0>;
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
// VABA : Vector Absolute Difference and Accumulate
-defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", int_arm_neon_vabas>;
-defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", int_arm_neon_vabau>;
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "s", int_arm_neon_vabals>;
-defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
@@ -2539,7 +3132,7 @@
"vpadd", "i32",
v2i32, v2i32, int_arm_neon_vpadd, 0>;
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
- IIC_VBIND, "vpadd", "f32",
+ IIC_VPBIND, "vpadd", "f32",
v2f32, v2f32, int_arm_neon_vpadd, 0>;
// VPADDL : Vector Pairwise Add Long
@@ -2567,7 +3160,7 @@
"u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
"u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
"f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
@@ -2583,7 +3176,7 @@
"u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
"u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin",
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
"f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -2635,12 +3228,12 @@
// Vector Shifts.
// VSHL : Vector Shift
-defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
+defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
- "vshl", "s", int_arm_neon_vshifts, 0>;
-defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
+ "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
- "vshl", "u", int_arm_neon_vshiftu, 0>;
+ "vshl", "u", int_arm_neon_vshiftu>;
// VSHL : Vector Shift Left (Immediate)
defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
N2RegVShLFrm>;
@@ -2674,12 +3267,12 @@
NEONvshrn>;
// VRSHL : Vector Rounding Shift
-defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
+defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vrshl", "s", int_arm_neon_vrshifts, 0>;
-defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
+ "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vrshl", "u", int_arm_neon_vrshiftu, 0>;
+ "vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
N2RegVShRFrm>;
@@ -2691,12 +3284,12 @@
NEONvrshrn>;
// VQSHL : Vector Saturating Shift
-defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
+defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqshl", "s", int_arm_neon_vqshifts, 0>;
-defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
+ "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqshl", "u", int_arm_neon_vqshiftu, 0>;
+ "vqshl", "u", int_arm_neon_vqshiftu>;
// VQSHL : Vector Saturating Shift Left (Immediate)
defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
N2RegVShLFrm>;
@@ -2717,12 +3310,12 @@
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
-defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
+defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
+ "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
- "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
+ "vqrshl", "u", int_arm_neon_vqrshiftu>;
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
@@ -2777,7 +3370,7 @@
[(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
- IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
+ IIC_VSHLiQ, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
// VNEG : Vector Negate (integer)
@@ -2842,17 +3435,17 @@
let neverHasSideEffects = 1 in {
def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
- N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+ N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>;
def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
- N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+ N3RegFrm, IIC_VMOV, "vmov", "$dst, $src", "", []>;
// Pseudo vector move instructions for QQ and QQQQ registers. This should
// be expanded after register allocation is completed.
def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
- NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+ NoItinerary, "", []>;
def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
- NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+ NoItinerary, "", []>;
} // neverHasSideEffects
// VMOV : Vector Move (Immediate)
@@ -2870,20 +3463,30 @@
def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
- [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
+ [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
- [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
+ [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
- [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
+ [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
+
def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
- [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
+ [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{11-8} = SIMM{11-8};
+}
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
@@ -3070,7 +3673,7 @@
class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy>
: NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
- IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
+ IIC_VMOVQ, OpcodeStr, Dt, "$dst, $src[$lane]",
[(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
imm:$lane)))]>;
@@ -3102,19 +3705,14 @@
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
-def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0,
- (outs DPR:$dst), (ins SPR:$src),
- IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+def VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
[(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
-
-def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
- (outs QPR:$dst), (ins SPR:$src),
- IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
+def VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
- "vmovn", "i", int_arm_neon_vmovn>;
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+ "vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
"vqmovn", "s", int_arm_neon_vqmovns>;
@@ -3123,10 +3721,8 @@
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
"vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
-defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
- int_arm_neon_vmovls>;
-defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
- int_arm_neon_vmovlu>;
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
// Vector Conversions.
@@ -3179,7 +3775,7 @@
[(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>;
class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst),
- (ins QPR:$src), IIC_VMOVD,
+ (ins QPR:$src), IIC_VMOVQ,
OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>;
@@ -3202,7 +3798,7 @@
[(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>;
class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst),
- (ins QPR:$src), IIC_VMOVD,
+ (ins QPR:$src), IIC_VMOVQ,
OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>;
@@ -3221,7 +3817,7 @@
[(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>;
class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst),
- (ins QPR:$src), IIC_VMOVD,
+ (ins QPR:$src), IIC_VMOVQ,
OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>;
@@ -3310,6 +3906,13 @@
"vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
} // hasExtraSrcRegAllocReq = 1
+def VTBL2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def VTBL3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def VTBL4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
@@ -3335,6 +3938,16 @@
"$orig = $dst", []>;
} // hasExtraSrcRegAllocReq = 1
+def VTBX2Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+ IIC_VTBX2, "$orig = $dst", []>;
+def VTBX3Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX3, "$orig = $dst", []>;
+def VTBX4Pseudo
+ : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+ IIC_VTBX4, "$orig = $dst", []>;
+
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb.td (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb.td Tue Oct 26 19:48:03 2010
@@ -126,13 +126,11 @@
// these will always be in pairs, and asserts if it finds otherwise. Better way?
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def tADJCALLSTACKUP :
-PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
- "${:comment} tADJCALLSTACKUP $amt1",
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary, "",
[(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
def tADJCALLSTACKDOWN :
-PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
- "${:comment} tADJCALLSTACKDOWN $amt",
+PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, "",
[(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
}
@@ -208,8 +206,7 @@
// For both thumb1 and thumb2.
let isNotDuplicable = 1 in
-def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
- "\n$cp:\n\tadd\t$dst, pc",
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
[(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
T1Special<{0,0,?,?}> {
let Inst{6-3} = 0b1111; // A8.6.6 Rm = pc
@@ -221,9 +218,13 @@
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
// ADD rd, sp, #imm8
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in {
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
"add\t$dst, $sp, $rhs", []>,
T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+}
// ADD sp, sp, #imm7
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
@@ -251,19 +252,6 @@
let Inst{2-0} = 0b101;
}
-// Pseudo instruction that will expand into a tSUBspi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
-
-def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "${:comment} add\t$dst, $rhs", []>;
-
-let Defs = [CPSR] in
-def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "${:comment} and\t$dst, $rhs", []>;
-} // usesCustomInserter
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
@@ -291,7 +279,8 @@
// FIXME: remove when we have a way to marking a MI with these properties.
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
-def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
+ IIC_iPop_Br,
"pop${p}\t$dsts", []>,
T1Misc<{1,1,0,?,?,?,?}>;
@@ -303,14 +292,14 @@
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
(outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "bl\t${func:call}",
+ "bl\t$func",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsNotDarwin]>;
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
(outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "blx\t${func:call}",
+ "blx\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>;
@@ -338,14 +327,14 @@
// Also used for Thumb2
def tBLr9 : TIx2<0b11110, 0b11, 1,
(outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "bl\t${func:call}",
+ "bl\t$func",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsDarwin]>;
// ARMv5T and above, also used for Thumb2
def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
(outs), (ins i32imm:$func, variable_ops), IIC_Br,
- "blx\t${func:call}",
+ "blx\t$func",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsDarwin]>;
@@ -374,7 +363,7 @@
// Far jump
let Defs = [LR] in
def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
- "bl\t$target\t${:comment} far jump",[]>;
+ "bl\t$target",[]>;
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
@@ -419,11 +408,9 @@
// A8.6.16 B: Encoding T1
// If Inst{11-8} == 0b1110 then UNDEFINED
-// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
-// binutils
let isBarrier = 1, isTerminator = 1 in
def tTRAP : TI<(outs), (ins), IIC_Br,
- ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 {
+ "trap", [(trap)]>, Encoding16 {
let Inst{15-12} = 0b1101;
let Inst{11-8} = 0b1110;
}
@@ -433,47 +420,47 @@
//
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
+def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoad_r,
"ldr", "\t$dst, $addr",
[(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
T1LdSt<0b100>;
-def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
+def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoad_r,
"ldr", "\t$dst, $addr",
[]>,
T1LdSt4Imm<{1,?,?}>;
-def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
+def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoad_bh_r,
"ldrb", "\t$dst, $addr",
[(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>,
T1LdSt<0b110>;
-def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr,
+def tLDRBi: T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoad_bh_r,
"ldrb", "\t$dst, $addr",
[]>,
T1LdSt1Imm<{1,?,?}>;
-def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
+def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoad_bh_r,
"ldrh", "\t$dst, $addr",
[(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>,
T1LdSt<0b101>;
-def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr,
+def tLDRHi: T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoad_bh_r,
"ldrh", "\t$dst, $addr",
[]>,
T1LdSt2Imm<{1,?,?}>;
let AddedComplexity = 10 in
-def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoad_bh_r,
"ldrsb", "\t$dst, $addr",
[(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>,
T1LdSt<0b011>;
let AddedComplexity = 10 in
-def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr,
+def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoad_bh_r,
"ldrsh", "\t$dst, $addr",
[(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>,
T1LdSt<0b111>;
let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
+def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
"ldr", "\t$dst, $addr",
[(set tGPR:$dst, (load t_addrmode_sp:$addr))]>,
T1LdStSP<{1,?,?}>;
@@ -481,14 +468,14 @@
// Special instruction for restore. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
-def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
+def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
+def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoad_i,
"ldr", ".n\t$dst, $addr",
[(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>,
T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
@@ -496,38 +483,38 @@
// Special LDR for loads from non-pc-relative constpools.
let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
isReMaterializable = 1 in
-def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
+def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoad_i,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
-def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
+def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStore_r,
"str", "\t$src, $addr",
[(store tGPR:$src, t_addrmode_s4:$addr)]>,
T1LdSt<0b000>;
-def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer,
+def tSTRi: T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStore_r,
"str", "\t$src, $addr",
[]>,
T1LdSt4Imm<{0,?,?}>;
-def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
+def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStore_bh_r,
"strb", "\t$src, $addr",
[(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>,
T1LdSt<0b010>;
-def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer,
+def tSTRBi: T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStore_bh_r,
"strb", "\t$src, $addr",
[]>,
T1LdSt1Imm<{0,?,?}>;
-def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
+def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStore_bh_r,
"strh", "\t$src, $addr",
[(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>,
T1LdSt<0b001>;
-def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer,
+def tSTRHi: T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStore_bh_r,
"strh", "\t$src, $addr",
[]>,
T1LdSt2Imm<{0,?,?}>;
-def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
+def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
"str", "\t$src, $addr",
[(store tGPR:$src, t_addrmode_sp:$addr)]>,
T1LdStSP<{0,?,?}>;
@@ -535,7 +522,7 @@
let mayStore = 1, neverHasSideEffects = 1 in {
// Special instruction for spill. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
-def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
+def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
"str", "\t$src, $addr", []>,
T1LdStSP<{0,?,?}>;
}
@@ -544,17 +531,17 @@
// Load / store multiple Instructions.
//
-// These requires base address to be written back or one of the loaded regs.
+// These require base address to be written back or one of the loaded regs.
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def tLDM : T1I<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadm,
+ IIC_iLoad_m,
"ldm${addr:submode}${p}\t$addr, $dsts", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
def tLDM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
- IIC_iLoadm,
+ IIC_iLoad_m,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
@@ -563,18 +550,20 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def tSTM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
- IIC_iStorem,
+ IIC_iStore_mu,
"stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
-def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops),
+ IIC_iPop,
"pop${p}\t$dsts", []>,
T1Misc<{1,1,0,?,?,?,?}>;
let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
-def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br,
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops),
+ IIC_iStore_m,
"push${p}\t$srcs", []>,
T1Misc<{0,1,0,?,?,?,?}>;
@@ -614,7 +603,7 @@
// And register
let isCommutable = 1 in
-def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iBITr,
"and", "\t$dst, $rhs",
[(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>,
T1DataProcessing<0b0000>;
@@ -632,13 +621,13 @@
T1DataProcessing<0b0100>;
// BIC register
-def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iBITr,
"bic", "\t$dst, $rhs",
[(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>,
T1DataProcessing<0b1110>;
// CMN register
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
// Compare-to-zero still works out, just not the relationals
//def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
@@ -652,7 +641,7 @@
}
// CMP immediate
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi,
"cmp", "\t$lhs, $rhs",
[(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>,
@@ -664,7 +653,7 @@
}
// CMP register
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
"cmp", "\t$lhs, $rhs",
[(ARMcmp tGPR:$lhs, tGPR:$rhs)]>,
@@ -685,7 +674,7 @@
// XOR register
let isCommutable = 1 in
-def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iBITr,
"eor", "\t$dst, $rhs",
[(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>,
T1DataProcessing<0b0001>;
@@ -754,14 +743,14 @@
T1DataProcessing<0b1101>;
// move inverse register
-def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr,
+def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMVNr,
"mvn", "\t$dst, $src",
[(set tGPR:$dst, (not tGPR:$src))]>,
T1DataProcessing<0b1111>;
// bitwise or register
let isCommutable = 1 in
-def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr,
+def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iBITr,
"orr", "\t$dst, $rhs",
[(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>,
T1DataProcessing<0b1100>;
@@ -845,8 +834,8 @@
T1Misc<{0,0,1,0,0,0,?}>;
// test
-let isCommutable = 1, Defs = [CPSR] in
-def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr,
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iTSTr,
"tst", "\t$lhs, $rhs",
[(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>,
T1DataProcessing<0b1000>;
@@ -871,7 +860,7 @@
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary, "${:comment} tMOVCCr $cc",
+ NoItinerary, "",
[/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
@@ -928,14 +917,7 @@
[ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1,
isBarrier = 1 in {
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
- AddrModeNone, SizeSpecial, NoItinerary,
- "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
- "adds\t$val, #7\n\t"
- "str\t$val, [$src, #4]\n\t"
- "movs\tr0, #0\n\t"
- "b\t1f\n\t"
- "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
- "1:", "",
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
}
@@ -944,12 +926,7 @@
Defs = [ R7, LR, SP ] in {
def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
AddrModeNone, SizeSpecial, IndexModeNone,
- Pseudo, NoItinerary,
- "ldr\t$scratch, [$src, #8]\n\t"
- "mov\tsp, $scratch\n\t"
- "ldr\t$scratch, [$src, #4]\n\t"
- "ldr\tr7, [$src]\n\t"
- "bx\t$scratch", "",
+ Pseudo, NoItinerary, "", "",
[(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
Requires<[IsThumb, IsDarwin]>;
}
@@ -1037,8 +1014,7 @@
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary,
- "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary, "",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb1Only]>;
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb2.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb2.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb2.td (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrThumb2.td Tue Oct 26 19:48:03 2010
@@ -51,10 +51,7 @@
// represented in the imm field in the same 12-bit form that they are encoded
// into t2_so_imm instructions: the 8-bit immediate is the least significant
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
-}]>;
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
@@ -131,7 +128,7 @@
// t2addrmode_imm12 := reg + imm12
def t2addrmode_imm12 : Operand<i32>,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
- let PrintMethod = "printT2AddrModeImm12Operand";
+ let PrintMethod = "printAddrModeImm12Operand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
@@ -143,13 +140,13 @@
}
def t2am_imm8_offset : Operand<i32>,
- ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{
+ ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
}
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
-def t2addrmode_imm8s4 : Operand<i32>,
- ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> {
+def t2addrmode_imm8s4 : Operand<i32> {
let PrintMethod = "printT2AddrModeImm8s4Operand";
let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
}
@@ -173,10 +170,11 @@
/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// unary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
-multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Cheap = 0, bit ReMat = 0> {
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
// shifted imm
- def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+ def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), iii,
opc, "\t$dst, $src",
[(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
let isAsCheapAsAMove = Cheap;
@@ -189,7 +187,7 @@
let Inst{15} = 0;
}
// register
- def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
+ def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), iir,
opc, ".w\t$dst, $src",
[(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11101;
@@ -202,7 +200,7 @@
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), iis,
opc, ".w\t$dst, $src",
[(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
@@ -216,10 +214,11 @@
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
-multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0, string wide =""> {
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0, string wide = ""> {
// shifted imm
- def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), iii,
opc, "\t$dst, $lhs, $rhs",
[(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
@@ -229,7 +228,7 @@
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), iir,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
[(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
@@ -242,7 +241,7 @@
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), iis,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
[(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
@@ -254,14 +253,15 @@
/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
// the ".w" prefix to indicate that they are wide.
-multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> :
- T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> :
+ T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_bin_irs counterpart.
-multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
opc, ".w\t$dst, $rhs, $lhs",
@@ -272,8 +272,20 @@
let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
+ // register
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
+ opc, "\t$dst, $rhs, $lhs",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = ?; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsir,
opc, "\t$dst, $rhs, $lhs",
[(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
@@ -286,10 +298,11 @@
/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
/// instruction modifies the CPSR register.
let Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0> {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode, bit Commutable = 0> {
// shifted imm
- def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), iii,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
[(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
@@ -299,7 +312,7 @@
let Inst{15} = 0;
}
// register
- def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), iir,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
[(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
@@ -312,7 +325,7 @@
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), iis,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
[(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
@@ -328,6 +341,9 @@
multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, ".w\t$dst, $lhs, $rhs",
[(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
@@ -338,6 +354,7 @@
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
+ }
// 12-bit imm
def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
!strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
@@ -461,7 +478,8 @@
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
@@ -513,10 +531,12 @@
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
-multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+let isCompare = 1, Defs = [CPSR] in {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+ PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
+ def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), iii,
opc, ".w\t$lhs, $rhs",
[(opnode GPR:$lhs, t2_so_imm:$rhs)]> {
let Inst{31-27} = 0b11110;
@@ -527,7 +547,7 @@
let Inst{11-8} = 0b1111; // Rd
}
// register
- def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
+ def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), iir,
opc, ".w\t$lhs, $rhs",
[(opnode GPR:$lhs, rGPR:$rhs)]> {
let Inst{31-27} = 0b11101;
@@ -540,7 +560,7 @@
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi,
+ def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), iis,
opc, ".w\t$lhs, $rhs",
[(opnode GPR:$lhs, t2_so_reg:$rhs)]> {
let Inst{31-27} = 0b11101;
@@ -553,8 +573,9 @@
}
/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
-multiclass T2I_ld<bit signed, bits<2> opcod, string opc, PatFrag opnode> {
- def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi,
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, PatFrag opnode> {
+ def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), iii,
opc, ".w\t$dst, $addr",
[(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]> {
let Inst{31-27} = 0b11111;
@@ -564,7 +585,7 @@
let Inst{22-21} = opcod;
let Inst{20} = 1; // load
}
- def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi,
+ def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), iii,
opc, "\t$dst, $addr",
[(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]> {
let Inst{31-27} = 0b11111;
@@ -578,7 +599,7 @@
let Inst{10} = 1; // The P bit.
let Inst{8} = 0; // The W bit.
}
- def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr,
+ def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), iir,
opc, ".w\t$dst, $addr",
[(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]> {
let Inst{31-27} = 0b11111;
@@ -589,7 +610,7 @@
let Inst{20} = 1; // load
let Inst{11-6} = 0b000000;
}
- def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
+ def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), iii,
opc, ".w\t$dst, $addr",
[(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> {
let isReMaterializable = 1;
@@ -604,8 +625,9 @@
}
/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
-multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
- def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei,
+multiclass T2I_st<bits<2> opcod, string opc,
+ InstrItinClass iii, InstrItinClass iir, PatFrag opnode> {
+ def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), iii,
opc, ".w\t$src, $addr",
[(opnode GPR:$src, t2addrmode_imm12:$addr)]> {
let Inst{31-27} = 0b11111;
@@ -613,7 +635,7 @@
let Inst{22-21} = opcod;
let Inst{20} = 0; // !load
}
- def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei,
+ def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), iii,
opc, "\t$src, $addr",
[(opnode GPR:$src, t2addrmode_imm8:$addr)]> {
let Inst{31-27} = 0b11111;
@@ -625,7 +647,7 @@
let Inst{10} = 1; // The P bit.
let Inst{8} = 0; // The W bit.
}
- def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer,
+ def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), iir,
opc, ".w\t$src, $addr",
[(opnode GPR:$src, t2addrmode_so_reg:$addr)]> {
let Inst{31-27} = 0b11111;
@@ -636,10 +658,10 @@
}
}
-/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iEXTr,
opc, ".w\t$dst, $src",
[(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11111;
@@ -650,7 +672,7 @@
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iEXTr,
opc, ".w\t$dst, $src, ror $rot",
[(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
@@ -664,8 +686,8 @@
}
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
-multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iEXTr,
opc, "\t$dst, $src",
[(set rGPR:$dst, (opnode rGPR:$src))]>,
Requires<[HasT2ExtractPack]> {
@@ -677,7 +699,7 @@
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iEXTr,
opc, "\t$dst, $src, ror $rot",
[(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
@@ -693,8 +715,8 @@
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
-multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iEXTr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -704,7 +726,7 @@
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iEXTr,
opc, "\t$dst, $src, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -716,10 +738,10 @@
}
}
-/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
-multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iEXTAr,
opc, "\t$dst, $LHS, $RHS",
[(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
Requires<[HasT2ExtractPack]> {
@@ -731,7 +753,7 @@
let Inst{5-4} = 0b00; // rotate
}
def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
- IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
+ IIC_iEXTAsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
[(set rGPR:$dst, (opnode rGPR:$LHS,
(rotr rGPR:$RHS, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
@@ -746,8 +768,8 @@
// DO variant - disassembly only, no pattern
-multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iEXTAr,
opc, "\t$dst, $LHS, $RHS", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -757,7 +779,7 @@
let Inst{5-4} = 0b00; // rotate
}
def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
- IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
+ IIC_iEXTAsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -780,7 +802,7 @@
let neverHasSideEffects = 1 in {
let isReMaterializable = 1 in
def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #$label", []> {
+ "adr${p}.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -792,7 +814,7 @@
} // neverHasSideEffects
def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #${label}_${id}", []> {
+ "adr${p}.w\t$dst, #${label}_${id}", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -888,43 +910,34 @@
let Inst{7-4} = 0b1111;
}
-// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
-// FIXME: Now that we have rGPR, do we need these pseudos? It seems
-// that the coalescer will now properly know how to do the right
-// thing without them.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
-def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
-def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
-} // usesCustomInserter
-
-
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
// Load
let canFoldAsLoad = 1, isReMaterializable = 1 in
-defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>;
+defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_r,
+ UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
-defm t2LDRH : T2I_ld<0, 0b01, "ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
-defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
+defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+ UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+ UnOpFrag<(zextloadi8 node:$Src)>>;
// Loads with sign extension
-defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
-defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+ UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_r,
+ UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
- IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
+ IIC_iLoad_d_i, "ldrd", "\t$dst1, $addr", []>;
def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
- (ins i32imm:$addr), IIC_iLoadi,
+ (ins i32imm:$addr), IIC_iLoad_d_i,
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
}
@@ -979,57 +992,57 @@
let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
"ldr", "\t$dst, $addr!", "$addr.base = $base_wb",
[]>;
def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
"ldr", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrb", "\t$dst, $addr!", "$addr.base = $base_wb",
[]>;
def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrb", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrh", "\t$dst, $addr!", "$addr.base = $base_wb",
[]>;
def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrh", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb",
[]>;
def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
- AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
"ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb",
[]>;
def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
+ AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
"ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
} // mayLoad = 1, neverHasSideEffects = 1
@@ -1037,8 +1050,8 @@
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
// for disassembly only.
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
-class T2IldT<bit signed, bits<2> type, string opc>
- : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), ii, opc,
"\t$dst, $addr", []> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1050,62 +1063,65 @@
let Inst{10-8} = 0b110; // PUW.
}
-def t2LDRT : T2IldT<0, 0b10, "ldrt">;
-def t2LDRBT : T2IldT<0, 0b00, "ldrbt">;
-def t2LDRHT : T2IldT<0, 0b01, "ldrht">;
-def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">;
-def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">;
+def t2LDRT : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
// Store
-defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_r,
+ BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_r,
+ BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_r,
+ BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
- IIC_iStorer, "strd", "\t$src1, $addr", []>;
+ IIC_iStore_d_r, "strd", "\t$src1, $addr", []>;
// Indexed stores
def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"str", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
+ AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
"str", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
(post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
+ AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
"strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
"strh", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
(post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePre, IIC_iStoreiu,
+ AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
"strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
(ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
- AddrModeT2_i8, IndexModePost, IIC_iStoreiu,
+ AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
"strb", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
(post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
@@ -1113,8 +1129,8 @@
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
-class T2IstT<bits<2> type, string opc>
- : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc,
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+ : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), ii, opc,
"\t$src, $addr", []> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
@@ -1126,28 +1142,28 @@
let Inst{10-8} = 0b110; // PUW
}
-def t2STRT : T2IstT<0b10, "strt">;
-def t2STRBT : T2IstT<0b00, "strbt">;
-def t2STRHT : T2IstT<0b01, "strht">;
+def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
// ldrd / strd pre / post variants
// For disassembly only.
def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
"ldrd", "\t$dst1, $dst2, [$base, $imm]!", []>;
def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$dst1, GPR:$dst2),
- (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
+ (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
"ldrd", "\t$dst1, $dst2, [$base], $imm", []>;
def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs),
(ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
- NoItinerary, "strd", "\t$src1, $src2, [$base, $imm]!", []>;
+ IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base, $imm]!", []>;
def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
(ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
- NoItinerary, "strd", "\t$src1, $src2, [$base], $imm", []>;
+ IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $imm", []>;
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access. These are for disassembly only.
@@ -1156,7 +1172,7 @@
// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
multiclass T2Ipl<bit instr, bit write, string opc> {
- def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
+ def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoad_i, opc,
"\t[$base, $imm]", []> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
@@ -1167,7 +1183,7 @@
let Inst{15-12} = 0b1111;
}
- def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
+ def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc,
"\t[$base, $imm]", []> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
@@ -1179,7 +1195,7 @@
let Inst{11-8} = 0b1100;
}
- def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
+ def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoad_i, opc,
"\t[pc, $imm]", []> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
@@ -1191,7 +1207,7 @@
let Inst{15-12} = 0b1111;
}
- def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc,
+ def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoad_i, opc,
"\t[$base, $a]", []> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
@@ -1204,7 +1220,7 @@
let Inst{5-4} = 0b00; // no shift is applied
}
- def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc,
+ def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoad_i, opc,
"\t[$base, $a, lsl $shamt]", []> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
@@ -1227,7 +1243,7 @@
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_iLoadm,
+ reglist:$dsts, variable_ops), IIC_iLoad_m,
"ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
@@ -1238,7 +1254,8 @@
}
def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_iLoadm,
+ reglist:$dsts, variable_ops),
+ IIC_iLoad_mu,
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
@@ -1252,7 +1269,7 @@
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
- reglist:$srcs, variable_ops), IIC_iStorem,
+ reglist:$srcs, variable_ops), IIC_iStore_m,
"stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
@@ -1264,7 +1281,7 @@
def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IIC_iStorem,
+ IIC_iStore_m,
"stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
@@ -1336,28 +1353,28 @@
// Sign extenders
-defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
+defm t2SXTB : T2I_ext_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
-defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
+defm t2SXTH : T2I_ext_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
-defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
-defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah",
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
// TODO: SXT(A){B|H}16 - done for disassembly only
// Zero extenders
let AddedComplexity = 16 in {
-defm t2UXTB : T2I_unary_rrot<0b101, "uxtb",
+defm t2UXTB : T2I_ext_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
-defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
+defm t2UXTH : T2I_ext_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
@@ -1369,11 +1386,11 @@
def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
(t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
-defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
-defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah",
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
-defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
}
//===----------------------------------------------------------------------===//
@@ -1387,8 +1404,10 @@
// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+ IIC_iALUi, IIC_iALUr, IIC_iALUsi,
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
@@ -1401,7 +1420,7 @@
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
-defm t2RSB : T2I_rbin_is <0b1110, "rsb",
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
@@ -1524,24 +1543,13 @@
// Signed/Unsigned saturate -- for disassembly only
-def t2SSATlsl:T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos,rGPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2SSATasr:T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos,rGPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
@@ -1556,24 +1564,13 @@
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def t2USATlsl:T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos,rGPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2USATasr:T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos,rGPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
@@ -1588,8 +1585,8 @@
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSATlsl imm:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USATlsl imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -1601,7 +1598,7 @@
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
+def t2RRX : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"rrx", "\t$dst, $src",
[(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
let Inst{31-27} = 0b11101;
@@ -1648,13 +1645,17 @@
//
defm t2AND : T2I_bin_w_irs<0b0000, "and",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
defm t2ORR : T2I_bin_w_irs<0b0010, "orr",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
let Constraints = "$src = $dst" in
@@ -1669,7 +1670,7 @@
}
def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
+ IIC_iUNAsi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10100;
@@ -1677,7 +1678,7 @@
}
def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
+ IIC_iUNAsi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b11100;
@@ -1688,7 +1689,7 @@
let Constraints = "$src = $dst" in
def t2BFI : T2I<(outs rGPR:$dst),
(ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
- IIC_iALUi, "bfi", "\t$dst, $val, $imm",
+ IIC_iBITi, "bfi", "\t$dst, $val, $imm",
[(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
@@ -1697,12 +1698,15 @@
let Inst{15} = 0;
}
-defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS,
- (not node:$RHS))>>;
+defm t2ORN : T2I_bin_irs<0b0011, "orn",
+ IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>, 0, "">;
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
-defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
+defm t2MVN : T2I_un_irs <0b0011, "mvn",
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+ UnOpFrag<(not node:$Src)>, 1, 1>;
let AddedComplexity = 1 in
@@ -1754,7 +1758,8 @@
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
+def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"smull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1762,7 +1767,8 @@
let Inst{7-4} = 0b0000;
}
-def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst), (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
+def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"umull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1772,7 +1778,8 @@
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
+def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"smlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1780,7 +1787,8 @@
let Inst{7-4} = 0b0000;
}
-def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
+def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1788,7 +1796,8 @@
let Inst{7-4} = 0b0000;
}
-def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst), (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
+def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umaal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1829,7 +1838,7 @@
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLAR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
+def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlar", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1838,7 +1847,7 @@
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLS : T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
+def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
[(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
@@ -1848,7 +1857,7 @@
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLSR : T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
+def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlsr", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1858,7 +1867,7 @@
}
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
[(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
(sext_inreg rGPR:$b, i16)))]> {
@@ -1870,7 +1879,7 @@
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "bt"), "\t$dst, $a, $b",
[(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
(sra rGPR:$b, (i32 16))))]> {
@@ -1882,7 +1891,7 @@
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "tb"), "\t$dst, $a, $b",
[(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
(sext_inreg rGPR:$b, i16)))]> {
@@ -1894,7 +1903,7 @@
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "tt"), "\t$dst, $a, $b",
[(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
(sra rGPR:$b, (i32 16))))]> {
@@ -1949,7 +1958,7 @@
def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
[(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
- (sra rGPR:$b, (i32 16)))))]> {
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1961,7 +1970,7 @@
def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
[(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
- (sext_inreg rGPR:$b, i16))))]> {
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1973,7 +1982,7 @@
def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
[(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
- (sra rGPR:$b, (i32 16)))))]> {
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1985,7 +1994,7 @@
def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
[(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
- (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
+ (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1997,7 +2006,7 @@
def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
[(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
- (sra rGPR:$b, (i32 16))), (i32 16))))]> {
+ (sra rGPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -2012,35 +2021,35 @@
// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
- (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
// These are for disassembly only.
-def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
- IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
@@ -2091,7 +2100,7 @@
[(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
- "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
+ "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rev16", ".w\t$dst, $src",
@@ -2099,7 +2108,7 @@
(or (and (srl rGPR:$src, (i32 8)), 0xFF),
(or (and (shl rGPR:$src, (i32 8)), 0xFF00),
(or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
- (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
+ (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"revsh", ".w\t$dst, $src",
@@ -2108,10 +2117,10 @@
(or (srl (and rGPR:$src, 0xFF00), (i32 8)),
(shl rGPR:$src, (i32 8))), i16))]>;
-def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
+def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iBITsi, "pkhbt", "\t$dst, $src1, $src2$sh",
[(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
- (and (shl rGPR:$src2, (i32 imm:$shamt)),
+ (and (shl rGPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
@@ -2125,15 +2134,17 @@
def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
(t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$shamt)),
- (t2PKHBT rGPR:$src1, rGPR:$src2, imm16_31:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iBITsi, "pkhtb", "\t$dst, $src1, $src2$sh",
[(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
- (and (sra rGPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>,
+ (and (sra rGPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2144,21 +2155,22 @@
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, (i32 16))),
- (t2PKHTB rGPR:$src1, rGPR:$src2, 16)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
- (and (srl rGPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB rGPR:$src1, rGPR:$src2, imm1_15:$shamt)>,
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
-
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
//FIXME: Disable CMN, as CCodes are backwards from compare expectations
@@ -2166,6 +2178,7 @@
//defm t2CMN : T2I_cmp_irs<0b1000, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+ IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
//def : T2Pat<(ARMcmp GPR:$src, t2_so_imm_neg:$imm),
@@ -2175,15 +2188,12 @@
(t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
defm t2TST : T2I_cmp_irs<0b0000, "tst",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>;
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
+ IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
-// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero.
-// Short range conditional branch. Looks awesome for loops. Need to figure
-// out how to use this one.
-
-
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
@@ -2213,6 +2223,17 @@
let Inst{15} = 0;
}
+def t2MOVCCi16 : T2I<(outs rGPR:$dst), (ins rGPR:$false, i32imm:$src),
+ IIC_iMOVi,
+ "movw", "\t$dst, $src", []>,
+ RegConstraint<"$false = $dst"> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25} = 1;
+ let Inst{24-21} = 0b0010;
+ let Inst{20} = 0; // The S bit.
+ let Inst{15} = 0;
+}
+
class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
@@ -2247,21 +2268,15 @@
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F5;
// FIXME: add support for options other than a full system DMB
let Inst{3-0} = 0b1111;
}
-def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F4;
// FIXME: add support for options other than a full system DSB
let Inst{3-0} = 0b1111;
@@ -2438,32 +2453,18 @@
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
- AddrModeNone, SizeSpecial, NoItinerary,
- "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
- "adds\t$val, #7\n\t"
- "str\t$val, [$src, #4]\n\t"
- "movs\tr0, #0\n\t"
- "b\t1f\n\t"
- "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
- "1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
- AddrModeNone, SizeSpecial, NoItinerary,
- "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
- "adds\t$val, #7\n\t"
- "str\t$val, [$src, #4]\n\t"
- "movs\tr0, #0\n\t"
- "b\t1f\n\t"
- "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
- "1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+ AddrModeNone, SizeSpecial, NoItinerary, "", "",
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
}
@@ -2479,7 +2480,8 @@
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
- reglist:$dsts, variable_ops), IIC_Br,
+ reglist:$dsts, variable_ops),
+ IIC_iLoad_mBr,
"ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
@@ -2712,12 +2714,12 @@
(t2_so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
-// This is a single pseudo instruction to make it re-materializable. Remove
-// when we can do generalized remat.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set rGPR:$dst, (i32 imm:$src))]>;
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+ "", [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb, HasV6T2]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2734,8 +2736,7 @@
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary,
- "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ IIC_iLoadiALU, "",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrVFP.td?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMInstrVFP.td Tue Oct 26 19:48:03 2010
@@ -1,4 +1,4 @@
-//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,30 +11,26 @@
//
//===----------------------------------------------------------------------===//
-def SDT_FTOI :
-SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
-def SDT_ITOF :
-SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
-def SDT_CMPFP0 :
-SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDT_VMOVDRR :
-SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
- SDTCisSameAs<1, 2>]>;
-
-def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
-def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
-def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
-def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
-def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
-def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
-def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
-def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+def SDT_FTOI : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>;
+def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>;
+def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>;
+def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag, SDNPOutFlag]>;
+def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutFlag]>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
-
def vfp_f32imm : Operand<f32>,
PatLeaf<(f32 fpimm), [{
return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
@@ -55,9 +51,9 @@
//
let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
- [(set DPR:$dst, (f64 (load addrmode5:$addr)))]>;
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+ [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
IIC_fpLoad32, "vldr", ".32\t$dst, $addr",
@@ -77,121 +73,227 @@
//
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
- variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
+ variable_ops), IndexModeNone, IIC_fpLoad_m,
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
- variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
+ variable_ops), IndexModeNone, IIC_fpLoad_m,
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ IndexModeUpd, IIC_fpLoad_mu,
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
-def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
- IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ IndexModeUpd, IIC_fpLoad_mu,
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
- variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
+ variable_ops), IndexModeNone, IIC_fpStore_m,
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
- variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
+ variable_ops), IndexModeNone, IIC_fpStore_m,
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ IndexModeUpd, IIC_fpStore_mu,
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
-def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
- IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ IndexModeUpd, IIC_fpStore_mu,
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
-//===----------------------------------------------------------------------===//
-// FP Binary Operations.
-//
-
-def VADDD : ADbI<0b11100, 0b11, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fadd DPR:$a, (f64 DPR:$b)))]>;
-
-def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
-
-// These are encoded as unary instructions.
-let Defs = [FPSCR] in {
-def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins DPR:$a, DPR:$b),
- IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
- [(arm_cmpfp DPR:$a, (f64 DPR:$b))]>;
-
-def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$a, DPR:$b),
- IIC_fpCMP64, "vcmp", ".f64\t$a, $b",
- [/* For disassembly only; pattern left blank */]>;
-
-def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$a, SPR:$b),
- IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
- [(arm_cmpfp SPR:$a, SPR:$b)]>;
-def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$a, SPR:$b),
- IIC_fpCMP32, "vcmp", ".f32\t$a, $b",
- [/* For disassembly only; pattern left blank */]>;
+// FIXME: Can these be placed into the base class?
+class ADbI_Encode<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ADbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class ADuI_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<2> opcod4, bit opcod5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ADuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc,
+ asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class ASbI_Encode<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class ASbIn_Encode<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASbIn<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sn;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class ASuI_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<2> opcod4, bit opcod5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc,
+ asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class ASuIn_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<2> opcod4, bit opcod5, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : ASuIn<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc,
+ asm, pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
}
-def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fdiv DPR:$a, (f64 DPR:$b)))]>;
-
-def VDIVS : ASbI<0b11101, 0b00, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
-
-def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fmul DPR:$a, (f64 DPR:$b)))]>;
-
-def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
-
-def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fneg (fmul DPR:$a, (f64 DPR:$b))))]>;
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
-def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]>;
+def VADDD : ADbI_Encode<0b11100, 0b11, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS : ASbIn_Encode<0b11100, 0b11, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]>;
+
+def VSUBD : ADbI_Encode<0b11100, 0b11, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS : ASbIn_Encode<0b11100, 0b11, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]>;
+
+def VDIVD : ADbI_Encode<0b11101, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS : ASbI_Encode<0b11101, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD : ADbI_Encode<0b11100, 0b10, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS : ASbIn_Encode<0b11100, 0b10, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]>;
+
+def VNMULD : ADbI_Encode<0b11100, 0b10, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI_Encode<0b11100, 0b10, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]>;
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -199,53 +301,108 @@
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
(VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
-
-def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fsub DPR:$a, (f64 DPR:$b)))]>;
-
-def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]>;
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI_Encode<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+ [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI_Encode<0b11101, 0b11, 0b0100, 0b11, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+ [(arm_cmpfp SPR:$Sd, SPR:$Sm)]>;
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD : ADuI_Encode<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins DPR:$Dd, DPR:$Dm),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS : ASuI_Encode<0b11101, 0b11, 0b0100, 0b01, 0,
+ (outs), (ins SPR:$Sd, SPR:$Sm),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+ [/* For disassembly only; pattern left blank */]>;
+}
//===----------------------------------------------------------------------===//
// FP Unary Operations.
//
-def VABSD : ADuI<0b11101, 0b11, 0b0000, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
- [(set DPR:$dst, (fabs (f64 DPR:$a)))]>;
-
-def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,(outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
- [(set SPR:$dst, (fabs SPR:$a))]>;
+def VABSD : ADuI_Encode<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS : ASuIn_Encode<0b11101, 0b11, 0b0000, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fabs SPR:$Sm))]>;
let Defs = [FPSCR] in {
-def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$a),
- IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
- [(arm_cmpfp0 (f64 DPR:$a))]>;
-
-def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$a),
- IIC_fpCMP64, "vcmp", ".f64\t$a, #0",
- [/* For disassembly only; pattern left blank */]>;
-
-def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins SPR:$a),
- IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
- [(arm_cmpfp0 SPR:$a)]>;
-
-def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins SPR:$a),
- IIC_fpCMP32, "vcmp", ".f32\t$a, #0",
- [/* For disassembly only; pattern left blank */]>;
-}
-
-def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
- [(set DPR:$dst, (fextend SPR:$a))]>;
+def VCMPEZD : ADuI_Encode<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+ [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPEZS : ASuI_Encode<0b11101, 0b11, 0b0101, 0b11, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+ [(arm_cmpfp0 SPR:$Sd)]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD : ADuI_Encode<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins DPR:$Dd),
+ IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+
+def VCMPZS : ASuI_Encode<0b11101, 0b11, 0b0101, 0b01, 0,
+ (outs), (ins SPR:$Sd),
+ IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{3-0} = 0b0000;
+ let Inst{5} = 0;
+}
+}
+
+def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+ [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
// Special case encoding: bits 11-8 is 0b1011.
-def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
- IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a",
- [(set SPR:$dst, (fround DPR:$a))]> {
+def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (fround DPR:$Dm))]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+
let Inst{27-23} = 0b11101;
let Inst{21-16} = 0b110111;
let Inst{11-8} = 0b1011;
@@ -255,6 +412,7 @@
// Between half-precision and single-precision. For disassembly only.
+// FIXME: Verify encoding after integrated assembler is working.
def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
@@ -277,47 +435,90 @@
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
-let neverHasSideEffects = 1 in {
-def VMOVD: ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
+def VNEGD : ADuI_Encode<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS : ASuIn_Encode<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fneg SPR:$Sm))]>;
+
+def VSQRTD : ADuI_Encode<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+ [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI_Encode<0b11101, 0b11, 0b0001, 0b11, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
-def VMOVS: ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
+let neverHasSideEffects = 1 in {
+def VMOVD : ADuI_Encode<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS : ASuI_Encode<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
} // neverHasSideEffects
-def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
- [(set DPR:$dst, (fneg (f64 DPR:$a)))]>;
-
-def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,(outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
- [(set SPR:$dst, (fneg SPR:$a))]>;
-
-def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
- [(set DPR:$dst, (fsqrt (f64 DPR:$a)))]>;
-
-def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
- [(set SPR:$dst, (fsqrt SPR:$a))]>;
-
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
//
-def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
- IIC_fpMOVSI, "vmov", "\t$dst, $src",
- [(set GPR:$dst, (bitconvert SPR:$src))]>;
-
-def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
- IIC_fpMOVIS, "vmov", "\t$dst, $src",
- [(set SPR:$dst, (bitconvert GPR:$src))]>;
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+ (outs GPR:$Rt), (ins SPR:$Sn),
+ IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+ [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+ // Instruction operands.
+ bits<4> Rt;
+ bits<5> Sn;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
+
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+ (outs SPR:$Sn), (ins GPR:$Rt),
+ IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+ [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+ // Instruction operands.
+ bits<5> Sn;
+ bits<4> Rt;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Sn{4-1};
+ let Inst{7} = Sn{0};
+ let Inst{15-12} = Rt;
+
+ let Inst{6-5} = 0b00;
+ let Inst{3-0} = 0b0000;
+}
let neverHasSideEffects = 1 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
- (outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
- IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
+ (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+ IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
[/* FIXME: Can't write pattern for multiple result instr*/]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
let Inst{7-6} = 0b00;
}
@@ -333,10 +534,21 @@
// FMDLR: GPR -> SPR
def VMOVDRR : AVConv5I<0b11000100, 0b1011,
- (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
- IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2",
- [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> {
- let Inst{7-6} = 0b00;
+ (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+ IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+ [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+ // Instruction operands.
+ bits<5> Dm;
+ bits<4> Rt;
+ bits<4> Rt2;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Rt;
+ let Inst{19-16} = Rt2;
+
+ let Inst{7-6} = 0b00;
}
let neverHasSideEffects = 1 in
@@ -350,104 +562,171 @@
// FMRDH: SPR -> GPR
// FMRDL: SPR -> GPR
// FMRRS: SPR -> GPR
-// FMRX : SPR system reg -> GPR
-
+// FMRX: SPR system reg -> GPR
// FMSRR: GPR -> SPR
-
-// FMXR: GPR -> VFP Sstem reg
+// FMXR: GPR -> VFP system reg
-// Int to FP:
+// Int -> FP:
-def VSITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
- (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
- [(set DPR:$dst, (f64 (arm_sitof SPR:$a)))]> {
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
let Inst{7} = 1; // s32
}
-def VSITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
- (outs SPR:$dst),(ins SPR:$a),
- IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
- [(set SPR:$dst, (arm_sitof SPR:$a))]> {
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd),(ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
let Inst{7} = 1; // s32
}
-def VUITOD : AVConv1I<0b11101, 0b11, 0b1000, 0b1011,
- (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
- [(set DPR:$dst, (f64 (arm_uitof SPR:$a)))]> {
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+ (outs DPR:$Dd), (ins SPR:$Sm),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+ [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
let Inst{7} = 0; // u32
}
-def VUITOS : AVConv1In<0b11101, 0b11, 0b1000, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
- [(set SPR:$dst, (arm_uitof SPR:$a))]> {
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
let Inst{7} = 0; // u32
}
-// FP to Int:
-// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+ bits<4> opcod4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+ pattern> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<5> Sm;
+
+ // Encode instruction operands.
+ let Inst{3-0} = Sm{4-1};
+ let Inst{5} = Sm{0};
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+}
-def VTOSIZD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
- [(set SPR:$dst, (arm_ftosi (f64 DPR:$a)))]> {
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
let Inst{7} = 1; // Z bit
}
-def VTOSIZS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
- [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
}
-def VTOUIZD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
- [(set SPR:$dst, (arm_ftoui (f64 DPR:$a)))]> {
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
let Inst{7} = 1; // Z bit
}
-def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
- [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
// For disassembly only.
-
-def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
let Inst{7} = 0; // Z bit
}
-def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
let Inst{7} = 0; // Z bit
}
-def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
- (outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+ (outs SPR:$Sd), (ins DPR:$Dm),
+ IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+ [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
let Inst{7} = 0; // Z bit
}
-def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
- (outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+ (outs SPR:$Sd), (ins SPR:$Sm),
+ IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+ [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
let Inst{7} = 0; // Z bit
}
+}
// Convert between floating-point and fixed-point
// Data type for fixed-point naming convention:
@@ -460,6 +739,7 @@
// FP to Fixed-Point:
+let isCodeGenOnly = 1 in {
def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
@@ -499,9 +779,11 @@
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+} // End of 'let isCodeGenOnly = 1 in'
// Fixed-Point to FP:
+let isCodeGenOnly = 1 in {
def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
@@ -541,6 +823,7 @@
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+} // End of 'let isCodeGenOnly = 1 in'
} // End of 'let Constraints = "$src = $dst" in'
@@ -548,91 +831,129 @@
// FP FMA Operations.
//
-def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
+class ADbI_vmlX_Encode<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin, string opc,
+ string asm, list<dag> pattern>
+ : ADbI_vmlX<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<5> Dn;
+ bits<5> Dm;
+
+ // Encode instruction operands.
+ let Inst{19-16} = Dn{3-0};
+ let Inst{7} = Dn{4};
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+ let Inst{3-0} = Dm{3-0};
+ let Inst{5} = Dm{4};
+}
+
+def VMLAD : ADbI_vmlX_Encode<0b11100, 0b00, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd (fmul DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">;
+
+def VMLAS : ASbIn_Encode<0b11100, 0b00, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd (fmul SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">;
+
+def : Pat<(fadd DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
+ (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fadd SPR:$dstin, (fmul SPR:$a, SPR:$b)),
+ (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+
+def VMLSD : ADbI_vmlX_Encode<0b11100, 0b00, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fadd (fneg (fmul DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">;
+
+def VMLSS : ASbIn_Encode<0b11100, 0b00, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fadd (fneg (fmul SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">;
def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
(VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
- (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
- [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
- (f64 DPR:$dstin)))]>,
- RegConstraint<"$dstin = $dst">;
-
-def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
- (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
- [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
- RegConstraint<"$dstin = $dst">;
+def VNMLAD : ADbI_vmlX_Encode<0b11100, 0b01, 1, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd,(fsub (fneg (fmul DPR:$Dn,DPR:$Dm)),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">;
+
+def VNMLAS : ASbI_Encode<0b11100, 0b01, 1, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub (fneg (fmul SPR:$Sn, SPR:$Sm)),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">;
+
+def : Pat<(fsub (fneg (fmul DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+
+def VNMLSD : ADbI_vmlX_Encode<0b11100, 0b01, 0, 0,
+ (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+ IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+ [(set DPR:$Dd, (fsub (fmul DPR:$Dn, DPR:$Dm),
+ (f64 DPR:$Ddin)))]>,
+ RegConstraint<"$Ddin = $Dd">;
+
+def VNMLSS : ASbI_Encode<0b11100, 0b01, 0, 0,
+ (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+ [(set SPR:$Sd, (fsub (fmul SPR:$Sn, SPR:$Sm),
+ SPR:$Sdin))]>,
+ RegConstraint<"$Sdin = $Sd">;
+
+def : Pat<(fsub (fmul DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+ (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+def : Pat<(fsub (fmul SPR:$a, SPR:$b), SPR:$dstin),
+ (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+
//===----------------------------------------------------------------------===//
// FP Conditional moves.
//
let neverHasSideEffects = 1 in {
-def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
- (outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
- [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
-
-def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
- (outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
- [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
-
-def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
- (outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
- [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
-
-def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
- (outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
- [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
- RegConstraint<"$false = $dst">;
+def VMOVDcc : ADuI_Encode<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+ [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc : ASuI_Encode<0b11101, 0b11, 0b0000, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+ [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
+
+def VNEGDcc : ADuI_Encode<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+ IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+ [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+ RegConstraint<"$Dn = $Dd">;
+
+def VNEGScc : ASuI_Encode<0b11101, 0b11, 0b0001, 0b01, 0,
+ (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+ IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+ [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+ RegConstraint<"$Sn = $Sd">;
} // neverHasSideEffects
//===----------------------------------------------------------------------===//
@@ -644,64 +965,99 @@
let Defs = [CPSR], Uses = [FPSCR] in
def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
"\tapsr_nzcv, fpscr",
- [(arm_fmstat)]> {
+ [(arm_fmstat)]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
let Inst{15-12} = 0b1111;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
let Inst{4} = 1;
+ let Inst{3-0} = 0b0000;
}
-// FPSCR <-> GPR (for disassembly only)
+// FPSCR <-> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : VFPAI<(outs GPR:$Rt), (ins), VFPMiscFrm, IIC_fpSTAT,
+ "vmrs", "\t$Rt, fpscr",
+ [(set GPR:$Rt, (int_arm_get_fpscr))]> {
+ // Instruction operand.
+ bits<4> Rt;
+
+ // Encode instruction operand.
+ let Inst{15-12} = Rt;
-let neverHasSideEffects = 1 in {
-let Uses = [FPSCR] in {
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
- "\t$dst, fpscr",
- [/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
+ let Inst{6-5} = 0b00;
let Inst{4} = 1;
-}
+ let Inst{3-0} = 0b0000;
}
-let Defs = [FPSCR] in {
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
- "\tfpscr, $src",
- [/* For disassembly only; pattern left blank */]> {
+let Defs = [FPSCR] in
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
+ "vmsr", "\tfpscr, $src",
+ [(int_arm_set_fpscr GPR:$src)]> {
+ // Instruction operand.
+ bits<4> src;
+
+ // Encode instruction operand.
+ let Inst{15-12} = src;
+
let Inst{27-20} = 0b11101110;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
-def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_fpUNA64,
- "vmov", ".f64\t$dst, $imm",
- [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ "vmov", ".f64\t$Dd, $imm",
+ [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Dd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Dd{3-0};
+ let Inst{22} = Dd{4};
+ let Inst{19} = imm{31};
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 1;
+ let Inst{8} = 1; // Double precision.
let Inst{7-4} = 0b0000;
}
-def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
- VFPMiscFrm, IIC_fpUNA32,
- "vmov", ".f32\t$dst, $imm",
- [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+ VFPMiscFrm, IIC_fpUNA32,
+ "vmov", ".f32\t$Sd, $imm",
+ [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ // Instruction operands.
+ bits<5> Sd;
+ bits<32> imm;
+
+ // Encode instruction operands.
+ let Inst{15-12} = Sd{4-1};
+ let Inst{22} = Sd{0};
+ let Inst{19} = imm{31}; // The immediate is handled as a double.
+ let Inst{18-16} = imm{22-20};
+ let Inst{3-0} = imm{19-16};
+
+ // Encode remaining instruction bits.
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 0;
+ let Inst{8} = 0; // Single precision.
let Inst{7-4} = 0b0000;
}
}
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMJITInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMJITInfo.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMJITInfo.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMJITInfo.cpp Tue Oct 26 19:48:03 2010
@@ -290,7 +290,7 @@
*((intptr_t*)RelocPos) |= ResultPtr;
// Set register Rn to PC.
*((intptr_t*)RelocPos) |=
- ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+ getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_pic_jt:
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMLoadStoreOptimizer.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMLoadStoreOptimizer.cpp Tue Oct 26 19:48:03 2010
@@ -57,7 +57,7 @@
namespace {
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -130,7 +130,7 @@
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
- case ARM::LDR:
+ case ARM::LDRi12:
++NumLDMGened;
return ARM::LDM;
case ARM::STR:
@@ -166,7 +166,7 @@
}
static bool isi32Load(unsigned Opc) {
- return Opc == ARM::LDR || isT2i32Load(Opc);
+ return Opc == ARM::LDRi12 || isT2i32Load(Opc);
}
static bool isT2i32Store(unsigned Opc) {
@@ -193,20 +193,17 @@
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
- if (isAM4 && Offset == 4) {
- if (isThumb2)
- // Thumb2 does not support ldmib / stmib.
- return false;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
Mode = ARM_AM::ib;
- } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
- if (isThumb2)
- // Thumb2 does not support ldmda / stmda.
- return false;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
Mode = ARM_AM::da;
- } else if (isAM4 && Offset == -4 * (int)NumRegs) {
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ else if (Offset != 0) {
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
@@ -246,18 +243,12 @@
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode);
- MachineInstrBuilder MIB = (isAM4)
- ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
- : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
- .addImm(Pred).addReg(PredReg);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -333,6 +324,7 @@
if (KilledRegs.count(Reg)) {
unsigned j = Killer[Reg];
memOps[j].MBBI->getOperand(0).setIsKill(false);
+ memOps[j].isKill = false;
}
}
MBB.erase(memOps[i].MBBI);
@@ -348,7 +340,7 @@
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
unsigned insertAfter = SIndex;
@@ -357,7 +349,7 @@
const MachineOperand &PMO = Loc->getOperand(0);
unsigned PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? UINT_MAX
- : ARMRegisterInfo::getRegisterNumbering(PReg);
+ : getARMRegisterNumbering(PReg);
unsigned Count = 1;
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
@@ -365,13 +357,13 @@
const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
- : ARMRegisterInfo::getRegisterNumbering(Reg);
- // AM4 - register numbers in ascending order.
- // AM5 - consecutive register numbers in ascending order.
- // Can only do up to 16 double-word registers per insn.
+ : getARMRegisterNumbering(Reg);
+ // Register numbers must be in ascending order. For VFP, the registers
+ // must also be consecutive and there is a limit of 16 double-word
+ // registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum)
+ ((isNotVFP && RegNum > PRegNum)
|| ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
@@ -409,7 +401,7 @@
return false;
// Make sure the offset fits in 8 bits.
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
@@ -433,7 +425,7 @@
MI->getOpcode() != ARM::ADDri)
return false;
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
@@ -448,7 +440,7 @@
static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return 0;
- case ARM::LDR:
+ case ARM::LDRi12:
case ARM::STR:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
@@ -464,12 +456,12 @@
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
- return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMS:
case ARM::VSTMS:
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
case ARM::VLDMD:
case ARM::VSTMD:
- return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
}
}
@@ -512,26 +504,17 @@
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
- Opcode == ARM::STM || Opcode == ARM::t2STM);
bool DoMerge = false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- unsigned Offset = 0;
- if (isAM4) {
- // Can't use an updating ld/st if the base register is also a dest
- // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).getReg() == Base)
- return false;
- }
- Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- } else {
- // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
- Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
}
+ Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -539,22 +522,14 @@
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
- if (isAM4) {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::db;
- } else if (isAM4 && Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::da;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- }
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
}
if (DoMerge)
MBB.erase(PrevMBBI);
@@ -566,19 +541,12 @@
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
- if (isAM4) {
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
}
if (DoMerge) {
if (NextMBBI == I) {
@@ -595,16 +563,9 @@
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(BaseKill));
- if (isAM4) {
- // [t2]LDM_UPD, [t2]STM_UPD
- MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
- .addImm(Pred).addReg(PredReg);
- } else {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
- MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
- .addImm(Pred).addReg(PredReg);
- }
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode))
+ .addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
@@ -617,7 +578,7 @@
static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
- case ARM::LDR: return ARM::LDR_PRE;
+ case ARM::LDRi12: return ARM::LDR_PRE;
case ARM::STR: return ARM::STR_PRE;
case ARM::VLDRS: return ARM::VLDMS_UPD;
case ARM::VLDRD: return ARM::VLDMD_UPD;
@@ -636,7 +597,7 @@
static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
- case ARM::LDR: return ARM::LDR_POST;
+ case ARM::LDRi12: return ARM::LDR_POST;
case ARM::STR: return ARM::STR_POST;
case ARM::VLDRS: return ARM::VLDMS_UPD;
case ARM::VLDRD: return ARM::VLDMD_UPD;
@@ -668,14 +629,18 @@
DebugLoc dl = MI->getDebugLoc();
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
- bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
- if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
+ bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STR);
+ // FIXME: This special handling of LDRi12 is hackery until all of the ARM
+ // LDR/STR insns are moved away from the addrmode2 mega-instruction to
+ // the split (LDRi12/LDRrs) style instructions.
+ if (Opcode == ARM::LDRi12 || isT2i32Load(Opcode) || isT2i32Store(Opcode))
+ if (MI->getOperand(2).getImm() != 0)
+ return false;
+ if (isAM2 && Opcode != ARM::LDRi12
+ && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
return false;
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
return false;
- if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
- if (MI->getOperand(2).getImm() != 0)
- return false;
bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
@@ -736,11 +701,10 @@
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
- Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
- (isDPR ? 2 : 1));
+ Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
+ ARM_AM::db : ARM_AM::ia);
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
@@ -748,6 +712,9 @@
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
MachineOperand &MO = MI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
@@ -819,7 +786,6 @@
int Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
- case ARM::LDR:
case ARM::STR:
return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
case ARM::VLDRS:
@@ -828,6 +794,7 @@
case ARM::VLDRD:
case ARM::VSTRD:
return MI->getOperand(1).isReg();
+ case ARM::LDRi12:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -855,14 +822,15 @@
static int getMemoryOpOffset(const MachineInstr *MI) {
int Opcode = MI->getOpcode();
- bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ bool isAM2 = Opcode == ARM::STR;
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
unsigned NumOperands = MI->getDesc().getNumOperands();
unsigned OffField = MI->getOperand(NumOperands-3).getImm();
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
- Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8)
+ Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+ Opcode == ARM::LDRi12)
return OffField;
int Offset = isAM2
@@ -981,7 +949,7 @@
assert((!isT2 || !OffReg) &&
"Thumb2 ldrd / strd does not encode offset register!");
unsigned NewOpc = (isLd)
- ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDR)
+ ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
: (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STR);
DebugLoc dl = MBBI->getDebugLoc();
// If this is a load and base register is killed, it may have been
@@ -1233,6 +1201,7 @@
unsigned NewOpc = isThumb2 ? ARM::t2LDM_RET : ARM::LDM_RET;
PrevMI->setDesc(TII->get(NewOpc));
MO.setReg(ARM::PC);
+ PrevMI->copyImplicitOps(&*MBBI);
MBB.erase(MBBI);
return true;
}
@@ -1268,7 +1237,7 @@
namespace {
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetData *TD;
const TargetInstrInfo *TII;
@@ -1378,7 +1347,7 @@
// FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
- if (Opcode == ARM::LDR)
+ if (Opcode == ARM::LDRi12)
NewOpc = ARM::LDRD;
else if (Opcode == ARM::STR)
NewOpc = ARM::STRD;
@@ -1394,11 +1363,11 @@
return false;
// Make sure the offset registers match.
- if (!isT2 &&
+ if (!isT2 && Opcode != ARM::LDRi12 &&
(Op0->getOperand(2).getReg() != Op1->getOperand(2).getReg()))
return false;
- // Must sure the base address satisfies i64 ld / st alignment requirement.
+ // Make sure the base address satisfies i64 ld / st alignment requirement.
if (!Op0->hasOneMemOperand() ||
!(*Op0->memoperands_begin())->getValue() ||
(*Op0->memoperands_begin())->isVolatile())
@@ -1407,7 +1376,7 @@
unsigned Align = (*Op0->memoperands_begin())->getAlignment();
const Function *Func = MF->getFunction();
unsigned ReqAlign = STI->hasV6Ops()
- ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext()))
+ ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
: 8; // Pre-v6 need 8-byte align
if (Align < ReqAlign)
return false;
@@ -1441,7 +1410,7 @@
if (EvenReg == OddReg)
return false;
BaseReg = Op0->getOperand(1).getReg();
- if (!isT2)
+ if (!isT2 && Opcode != ARM::LDRi12)
OffReg = Op0->getOperand(2).getReg();
Pred = llvm::getInstrPredicate(Op0, PredReg);
dl = Op0->getDebugLoc();
@@ -1549,8 +1518,12 @@
.addReg(EvenReg, RegState::Define)
.addReg(OddReg, RegState::Define)
.addReg(BaseReg);
+ // For now, we're converting from LDRi12 to an insn that still
+ // uses addrmode2, so we need an explicit offset reg. It should
+ // always by reg0 since we're transforming LDRi12s. The old
+ // was just being paranoid in allowing for anything else.
if (!isT2)
- MIB.addReg(OffReg);
+ MIB.addReg(0);
MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
++NumLDRDFormed;
} else {
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.cpp?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.cpp (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.cpp Tue Oct 26 19:48:03 2010
@@ -12,52 +12,72 @@
//
//===----------------------------------------------------------------------===//
+#include "ARM.h"
#include "ARMMCInstLower.h"
-//#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-//#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
using namespace llvm;
-
-#if 0
-const ARMSubtarget &ARMMCInstLower::getSubtarget() const {
- return AsmPrinter.getSubtarget();
+MCSymbol *ARMMCInstLower::GetGlobalAddressSymbol(const GlobalValue *GV) const {
+ return Printer.Mang->getSymbol(GV);
}
-MachineModuleInfoMachO &ARMMCInstLower::getMachOMMI() const {
- assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
- return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
-}
-#endif
+const MCSymbolRefExpr *ARMMCInstLower::
+GetSymbolRef(const MachineOperand &MO) const {
+ assert(MO.isGlobal() && "Isn't a global address reference?");
+
+ const MCSymbolRefExpr *SymRef;
+ const MCSymbol *Symbol = GetGlobalAddressSymbol(MO.getGlobal());
-MCSymbol *ARMMCInstLower::
-GetGlobalAddressSymbol(const MachineOperand &MO) const {
- // FIXME: HANDLE PLT references how??
switch (MO.getTargetFlags()) {
default: assert(0 && "Unknown target flag on GV operand");
- case 0: break;
+ case 0:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ break;
+ case ARMII::MO_LO16:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_LO16, Ctx);
+ break;
+ case ARMII::MO_HI16:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_HI16, Ctx);
+ break;
+ case ARMII::MO_PLT:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+ break;
}
-
- return Printer.Mang->getSymbol(MO.getGlobal());
+
+ return SymRef;
}
-MCSymbol *ARMMCInstLower::
+const MCSymbolRefExpr *ARMMCInstLower::
GetExternalSymbolSymbol(const MachineOperand &MO) const {
- // FIXME: HANDLE PLT references how??
+ const MCSymbolRefExpr *SymRef;
+ const MCSymbol *Symbol = Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+
switch (MO.getTargetFlags()) {
- default: assert(0 && "Unknown target flag on GV operand");
- case 0: break;
+ default: assert(0 && "Unknown target flag on external symbol operand");
+ case 0:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+ break;
+ case ARMII::MO_LO16:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_LO16, Ctx);
+ break;
+ case ARMII::MO_HI16:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_HI16, Ctx);
+ break;
+ case ARMII::MO_PLT:
+ SymRef = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+ break;
}
-
- return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+
+ return SymRef;
}
@@ -67,13 +87,12 @@
SmallString<256> Name;
raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
<< Printer.getFunctionNumber() << '_' << MO.getIndex();
-
-#if 0
+
switch (MO.getTargetFlags()) {
- default: llvm_unreachable("Unknown target flag on GV operand");
+ default: assert(0 && "Unknown target flag on jump table operand");
+ case 0: break;
}
-#endif
-
+
// Create a symbol for the name.
return Ctx.GetOrCreateSymbol(Name.str());
}
@@ -83,29 +102,38 @@
SmallString<256> Name;
raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
<< Printer.getFunctionNumber() << '_' << MO.getIndex();
-
-#if 0
+
switch (MO.getTargetFlags()) {
- default: llvm_unreachable("Unknown target flag on GV operand");
+ default: assert(0 && "Unknown target flag on CPI operand");
+ case 0: break;
}
-#endif
-
+
// Create a symbol for the name.
return Ctx.GetOrCreateSymbol(Name.str());
}
-
+
MCOperand ARMMCInstLower::
LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-
-#if 0
+
switch (MO.getTargetFlags()) {
- default: llvm_unreachable("Unknown target flag on GV operand");
+ default: assert(0 && "Unknown target flag on Symbol operand");
+ case 0: break;
}
-#endif
-
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand ARMMCInstLower::
+LowerSymbolRefOperand(const MachineOperand &MO,
+ const MCSymbolRefExpr *Sym) const {
+ const MCExpr *Expr = Sym;
if (!MO.isJTI() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
@@ -116,18 +144,18 @@
void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
-
+
MCOperand MCOp;
switch (MO.getType()) {
default:
MI->dump();
assert(0 && "unknown operand type");
case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit()) continue;
+ // Ignore all non-CPSR implicit register operands.
+ if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
@@ -139,10 +167,10 @@
MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ MCOp = LowerSymbolRefOperand(MO, GetSymbolRef(MO));
break;
case MachineOperand::MO_ExternalSymbol:
- MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ MCOp = LowerSymbolRefOperand(MO, GetExternalSymbolSymbol(MO));
break;
case MachineOperand::MO_JumpTableIndex:
MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
@@ -154,9 +182,15 @@
MCOp = LowerSymbolOperand(MO, Printer.GetBlockAddressSymbol(
MO.getBlockAddress()));
break;
+ case MachineOperand::MO_FPImmediate:
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
}
-
+
OutMI.addOperand(MCOp);
}
-
+
}
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMMCInstLower.h Tue Oct 26 19:48:03 2010
@@ -14,17 +14,19 @@
namespace llvm {
class AsmPrinter;
+ class GlobalValue;
class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
class MCSymbol;
+ class MCSymbolRefExpr;
class MachineInstr;
class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
//class ARMSubtarget;
-
+
/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY ARMMCInstLower {
MCContext &Ctx;
@@ -35,16 +37,20 @@
public:
ARMMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
: Ctx(ctx), Mang(mang), Printer(printer) {}
-
+
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
//MCSymbol *GetPICBaseSymbol() const;
- MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
- MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCSymbol *GetGlobalAddressSymbol(const GlobalValue *GV) const;
+ const MCSymbolRefExpr *GetSymbolRef(const MachineOperand &MO) const;
+ const MCSymbolRefExpr *GetExternalSymbolSymbol(const MachineOperand &MO)
+ const;
MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+ MCOperand LowerSymbolRefOperand(const MachineOperand &MO,
+ const MCSymbolRefExpr *Expr) const;
MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
-
+
/*
private:
MachineModuleInfoMachO &getMachOMMI() const;
Modified: llvm/branches/wendling/eh/lib/Target/ARM/ARMMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/wendling/eh/lib/Target/ARM/ARMMachineFunctionInfo.h?rev=117425&r1=117424&r2=117425&view=diff
==============================================================================
--- llvm/branches/wendling/eh/lib/Target/ARM/ARMMachineFunctionInfo.h (original)
+++ llvm/branches/wendling/eh/lib/Target/ARM/ARMMachineFunctionInfo.h Tue Oct 26 19:48:03 2010
@@ -43,6 +43,10 @@
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -51,28 +55,22 @@
/// spill stack offset.
unsigned FramePtrSpillOffset;
- /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
- /// register spills areas. For Mac OS X:
+ /// GPRCSOffset, DPRCSOffset - Starting offset of callee saved register
+ /// spills areas (excluding R9 for Mac OS X):
///
- /// GPR callee-saved (1) : r4, r5, r6, r7, lr
- /// --------------------------------------------
- /// GPR callee-saved (2) : r8, r10, r11
+ /// GPR callee-saved (1) : r4, r5, r6, r7, r8, r9, r10, r11, lr
/// --------------------------------------------
/// DPR callee-saved : d8 - d15
- unsigned GPRCS1Offset;
- unsigned GPRCS2Offset;
+ unsigned GPRCSOffset;
unsigned DPRCSOffset;
- /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
- /// areas.
- unsigned GPRCS1Size;
- unsigned GPRCS2Size;
+ /// GPRCSSize, DPRCSSize - Sizes of callee saved register spills areas.
+ unsigned GPRCSSize;
unsigned DPRCSSize;
- /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
- /// which belong to these spill areas.
- BitVector GPRCS1Frames;
- BitVector GPRCS2Frames;
+ /// GPRCSFrames, DPRCSFrames - Keeps track of frame indices which belong
+ /// to these spill areas.
+ BitVector GPRCSFrames;
BitVector DPRCSFrames;
/// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
@@ -95,22 +93,22 @@
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+ FramePtrSpillOffset(0), GPRCSOffset(0), DPRCSOffset(0),
+ GPRCSSize(0), DPRCSSize(0),
+ GPRCSFrames(0), DPRCSFrames(0),
JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+ FramePtrSpillOffset(0), GPRCSOffset(0), DPRCSOffset(0),
+ GPRCSSize(0), DPRCSSize(0),
+ GPRCSFrames(32), DPRCSFrames(32),
SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
HasITBlocks(false) {}
@@ -125,37 +123,31 @@
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
- unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
- unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+ unsigned getGPRCalleeSavedAreaOffset() const { return GPRCSOffset; }
unsigned getDPRCalleeSavedAreaOffset() const { return DPRCSOffset; }
- void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
- void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+ void setGPRCalleeSavedAreaOffset(unsigned o) { GPRCSOffset = o; }
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
- unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
- unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+ unsigned getGPRCalleeSavedAreaSize() const { return GPRCSSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
- void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
- void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+ void setGPRCalleeSavedAreaSize(unsigned s) { GPRCSSize = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }
- bool isGPRCalleeSavedArea1Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+ bool isGPRCalleeSavedAreaFrame(int fi) const {
+ if (fi < 0 || fi >= (int)GPRCSFrames.size())
return false;
- return GPRCS1Frames[fi];
- }
- bool isGPRCalleeSavedArea2Frame(int fi) const {
- if (fi < 0 || fi >= (int)GPRCS2Frames.size())
- return false;
- return GPRCS2Frames[fi];
+ return GPRCSFrames[fi];
}
bool isDPRCalleeSavedAreaFrame(int fi) const {
if (fi < 0 || fi >= (int)DPRCSFrames.size())
@@ -163,28 +155,16 @@
return DPRCSFrames[fi];
}
- void addGPRCalleeSavedArea1Frame(int fi) {
- if (fi >= 0) {
- int Size = GPRCS1Frames.size();
- if (fi >= Size) {
- Size *= 2;
- if (fi >= Size)
- Size = fi+1;
- GPRCS1Frames.resize(Size);
- }
- GPRCS1Frames[fi] = true;
- }
- }
- void addGPRCalleeSavedArea2Frame(int fi) {
+ void addGPRCalleeSavedAreaFrame(int fi) {
if (fi >= 0) {
- int Size = GPRCS2Frames.size();
+ int Size = GPRCSFrames.size();
if (fi >= Size) {
Size *= 2;
if (fi >= Size)
Size = fi+1;
- GPRCS2Frames.resize(Size);
+ GPRCSFrames.resize(Size);
}
- GPRCS2Frames[fi] = true;
+ GPRCSFrames[fi] = true;
}
}
void addDPRCalleeSavedAreaFrame(int fi) {
More information about the llvm-branch-commits
mailing list