[llvm] r213049 - [FastISel][X86] Implement the FastLowerCall hook.

Wed Jul 16 16:13:02 PDT 2014

Reapplied with a small bug fix. The issue was in the verification code that checked if a type is legal as an argument. It accidentally allowed i1 to be a legal type, which is not true and AnalyzeCallOperands was very upset about that.

The code falls back now to SelectionDAG if any of the call arguments is of type i1.

-Juergen

On Jul 14, 2014, at 11:35 PM, Juergen Ributzka <juergen at apple.com> wrote:

> Author: ributzka
> Date: Tue Jul 15 01:35:47 2014
> New Revision: 213049
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=213049&view=rev
> Log:
> [FastISel][X86] Implement the FastLowerCall hook.
> 
> This implements the FastLowerCall hook, which is based on the DoSelectCall
> function. The implementation is very similar, but the target-independent call
> lowering part has been factored out.
> 
> This should also enable patchpoint intrinsic lowering for FastISel on X86.
> 
> Related to <rdar://problem/17427052>.
> 
> Modified:
>    llvm/trunk/lib/Target/X86/X86FastISel.cpp
> 
> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=213049&r1=213048&r2=213049&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Tue Jul 15 01:35:47 2014
> @@ -74,6 +74,7 @@ public:
>                            const LoadInst *LI) override;
> 
>   bool FastLowerArguments() override;
> +  bool FastLowerCall(CallLoweringInfo &CLI) override;
> 
> #include "X86GenFastISel.inc"
> 
> @@ -2654,18 +2655,19 @@ bool X86FastISel::X86SelectCall(const In
>   return DoSelectCall(I, nullptr);
> }
> 
> -static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
> -                                           const ImmutableCallSite &CS) {
> -  if (Subtarget.is64Bit())
> +static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
> +                                           CallingConv::ID CC,
> +                                           ImmutableCallSite *CS) {
> +  if (Subtarget->is64Bit())
>     return 0;
> -  if (Subtarget.getTargetTriple().isOSMSVCRT())
> +  if (Subtarget->getTargetTriple().isOSMSVCRT())
>     return 0;
> -  CallingConv::ID CC = CS.getCallingConv();
> -  if (CC == CallingConv::Fast || CC == CallingConv::GHC)
> +  if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
> +      CC == CallingConv::HiPE)
>     return 0;
> -  if (!CS.paramHasAttr(1, Attribute::StructRet))
> +  if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
>     return 0;
> -  if (CS.paramHasAttr(1, Attribute::InReg))
> +  if (CS && CS->paramHasAttr(1, Attribute::InReg))
>     return 0;
>   return 4;
> }
> @@ -3025,7 +3027,7 @@ bool X86FastISel::DoSelectCall(const Ins
> 
>   // Issue CALLSEQ_END
>   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
> -  const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
> +  unsigned NumBytesCallee = computeBytesPoppedByCallee(Subtarget, CC, &CS);
>   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
>     .addImm(NumBytes).addImm(NumBytesCallee);
> 
> @@ -3107,6 +3109,395 @@ bool X86FastISel::DoSelectCall(const Ins
>   return true;
> }
> 
> +bool X86FastISel::FastLowerCall(CallLoweringInfo &CLI) {
> +  auto &OutVals       = CLI.OutVals;
> +  auto &OutFlags      = CLI.OutFlags;
> +  auto &OutRegs       = CLI.OutRegs;
> +  auto &Ins           = CLI.Ins;
> +  auto &InRegs        = CLI.InRegs;
> +  CallingConv::ID CC  = CLI.CallConv;
> +  bool &IsTailCall    = CLI.IsTailCall;
> +  bool IsVarArg       = CLI.IsVarArg;
> +  const Value *Callee = CLI.Callee;
> +  const char *SymName = CLI.SymName;
> +
> +  bool Is64Bit        = Subtarget->is64Bit();
> +  bool IsWin64        = Subtarget->isCallingConvWin64(CC);
> +
> +  // Handle only C, fastcc, and webkit_js calling conventions for now.
> +  switch (CC) {
> +  default: return false;
> +  case CallingConv::C:
> +  case CallingConv::Fast:
> +  case CallingConv::WebKit_JS:
> +  case CallingConv::X86_FastCall:
> +  case CallingConv::X86_64_Win64:
> +  case CallingConv::X86_64_SysV:
> +    break;
> +  }
> +
> +  // Allow SelectionDAG isel to handle tail calls.
> +  if (IsTailCall)
> +    return false;
> +
> +  // fastcc with -tailcallopt is intended to provide a guaranteed
> +  // tail call optimization. Fastisel doesn't know how to do that.
> +  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
> +    return false;
> +
> +  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
> +  // x86-32. Special handling for x86-64 is implemented.
> +  if (IsVarArg && IsWin64)
> +    return false;
> +
> +  // Don't know about inalloca yet.
> +  if (CLI.CS && CLI.CS->hasInAllocaArgument())
> +    return false;
> +
> +  // Fast-isel doesn't know about callee-pop yet.
> +  if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
> +                       TM.Options.GuaranteedTailCallOpt))
> +    return false;
> +
> +  // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
> +  // instruction. This is safe because it is common to all FastISel supported
> +  // calling conventions on x86.
> +  for (int i = 0, e = OutVals.size(); i != e; ++i) {
> +    Value *&Val = OutVals[i];
> +    ISD::ArgFlagsTy Flags = OutFlags[i];
> +    if (auto *CI = dyn_cast<ConstantInt>(Val)) {
> +      if (CI->getBitWidth() < 32) {
> +        if (Flags.isSExt())
> +          Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
> +        else
> +          Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
> +      }
> +    }
> +
> +    // Passing bools around ends up doing a trunc to i1 and passing it.
> +    // Codegen this as an argument + "and 1".
> +    if (auto *TI = dyn_cast<TruncInst>(Val)) {
> +      if (TI->getType()->isIntegerTy(1) && CLI.CS &&
> +          (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
> +          TI->hasOneUse()) {
> +        Val = cast<TruncInst>(Val)->getOperand(0);
> +        unsigned ResultReg = getRegForValue(Val);
> +
> +        if (!ResultReg)
> +          return false;
> +
> +        MVT ArgVT;
> +        if (!isTypeLegal(Val->getType(), ArgVT))
> +          return false;
> +
> +        ResultReg =
> +          FastEmit_ri(ArgVT, ArgVT, ISD::AND, ResultReg, Val->hasOneUse(), 1);
> +
> +        if (!ResultReg)
> +          return false;
> +        UpdateValueMap(Val, ResultReg);
> +      }
> +    }
> +  }
> +
> +  // Analyze operands of the call, assigning locations to each operand.
> +  SmallVector<CCValAssign, 16> ArgLocs;
> +  CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs,
> +                 CLI.RetTy->getContext());
> +
> +  // Allocate shadow area for Win64
> +  if (IsWin64)
> +    CCInfo.AllocateStack(32, 8);
> +
> +  SmallVector<MVT, 16> OutVTs;
> +  for (auto *Val : OutVals) {
> +    MVT VT;
> +    if (!isTypeLegal(Val->getType(), VT))
> +      return false;
> +    OutVTs.push_back(VT);
> +  }
> +  CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
> +
> +  // Get a count of how many bytes are to be pushed on the stack.
> +  unsigned NumBytes = CCInfo.getNextStackOffset();
> +
> +  // Issue CALLSEQ_START
> +  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
> +  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
> +    .addImm(NumBytes);
> +
> +  // Walk the register/memloc assignments, inserting copies/loads.
> +  const X86RegisterInfo *RegInfo =
> +    static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
> +  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
> +    CCValAssign const &VA = ArgLocs[i];
> +    const Value *ArgVal = OutVals[VA.getValNo()];
> +    MVT ArgVT = OutVTs[VA.getValNo()];
> +
> +    if (ArgVT == MVT::x86mmx)
> +      return false;
> +
> +    unsigned ArgReg = getRegForValue(ArgVal);
> +    if (!ArgReg)
> +      return false;
> +
> +    // Promote the value if needed.
> +    switch (VA.getLocInfo()) {
> +    case CCValAssign::Full: break;
> +    case CCValAssign::SExt: {
> +      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
> +             "Unexpected extend");
> +      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
> +                                       ArgVT, ArgReg);
> +      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
> +      ArgVT = VA.getLocVT();
> +      break;
> +    }
> +    case CCValAssign::ZExt: {
> +      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
> +             "Unexpected extend");
> +      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
> +                                       ArgVT, ArgReg);
> +      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
> +      ArgVT = VA.getLocVT();
> +      break;
> +    }
> +    case CCValAssign::AExt: {
> +      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
> +             "Unexpected extend");
> +      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
> +                                       ArgVT, ArgReg);
> +      if (!Emitted)
> +        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
> +                                    ArgVT, ArgReg);
> +      if (!Emitted)
> +        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
> +                                    ArgVT, ArgReg);
> +
> +      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
> +      ArgVT = VA.getLocVT();
> +      break;
> +    }
> +    case CCValAssign::BCvt: {
> +      ArgReg = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
> +                          /*TODO: Kill=*/false);
> +      assert(ArgReg && "Failed to emit a bitcast!");
> +      ArgVT = VA.getLocVT();
> +      break;
> +    }
> +    case CCValAssign::VExt:
> +      // VExt has not been implemented, so this should be impossible to reach
> +      // for now.  However, fallback to Selection DAG isel once implemented.
> +      return false;
> +    case CCValAssign::FPExt:
> +      llvm_unreachable("Unexpected loc info!");
> +    case CCValAssign::Indirect:
> +      // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
> +      // support this.
> +      return false;
> +    }
> +
> +    if (VA.isRegLoc()) {
> +      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
> +      OutRegs.push_back(VA.getLocReg());
> +    } else {
> +      assert(VA.isMemLoc());
> +      unsigned LocMemOffset = VA.getLocMemOffset();
> +      X86AddressMode AM;
> +      AM.Base.Reg = RegInfo->getStackRegister();
> +      AM.Disp = LocMemOffset;
> +      ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
> +      unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
> +      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
> +        MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
> +        ArgVT.getStoreSize(), Alignment);
> +      if (Flags.isByVal()) {
> +        X86AddressMode SrcAM;
> +        SrcAM.Base.Reg = ArgReg;
> +        if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
> +          return false;
> +      } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
> +        // If this is a really simple value, emit this with the Value* version
> +        // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
> +        // as it can cause us to reevaluate the argument.
> +        if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
> +          return false;
> +      } else {
> +        bool ValIsKill = hasTrivialKill(ArgVal);
> +        if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
> +          return false;
> +      }
> +    }
> +  }
> +
> +  // ELF / PIC requires GOT in the EBX register before function calls via PLT
> +  // GOT pointer.
> +  if (Subtarget->isPICStyleGOT()) {
> +    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
> +    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +            TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
> +  }
> +
> +  if (Is64Bit && IsVarArg && !IsWin64) {
> +    // From AMD64 ABI document:
> +    // For calls that may call functions that use varargs or stdargs
> +    // (prototype-less calls or calls to functions containing ellipsis (...) in
> +    // the declaration) %al is used as hidden argument to specify the number
> +    // of SSE registers used. The contents of %al do not need to match exactly
> +    // the number of registers, but must be an ubound on the number of SSE
> +    // registers used and is in the range 0 - 8 inclusive.
> +
> +    // Count the number of XMM registers allocated.
> +    static const MCPhysReg XMMArgRegs[] = {
> +      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
> +      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
> +    };
> +    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
> +    assert((Subtarget->hasSSE1() || !NumXMMRegs)
> +           && "SSE registers cannot be used when SSE is disabled");
> +    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
> +            X86::AL).addImm(NumXMMRegs);
> +  }
> +
> +  // Materialize callee address in a register. FIXME: GV address can be
> +  // handled with a CALLpcrel32 instead.
> +  X86AddressMode CalleeAM;
> +  if (!X86SelectCallAddress(Callee, CalleeAM))
> +    return false;
> +
> +  unsigned CalleeOp = 0;
> +  const GlobalValue *GV = nullptr;
> +  if (CalleeAM.GV != nullptr) {
> +    GV = CalleeAM.GV;
> +  } else if (CalleeAM.Base.Reg != 0) {
> +    CalleeOp = CalleeAM.Base.Reg;
> +  } else
> +    return false;
> +
> +  // Issue the call.
> +  MachineInstrBuilder MIB;
> +  if (CalleeOp) {
> +    // Register-indirect call.
> +    unsigned CallOpc = Is64Bit ? X86::CALL64r : CallOpc = X86::CALL32r;
> +    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
> +      .addReg(CalleeOp);
> +  } else {
> +    // Direct call.
> +    assert(GV && "Not a direct call");
> +    unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
> +
> +    // See if we need any target-specific flags on the GV operand.
> +    unsigned char OpFlags = 0;
> +
> +    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
> +    // external symbols most go through the PLT in PIC mode.  If the symbol
> +    // has hidden or protected visibility, or if it is static or local, then
> +    // we don't need to use the PLT - we can directly call it.
> +    if (Subtarget->isTargetELF() &&
> +        TM.getRelocationModel() == Reloc::PIC_ &&
> +        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
> +      OpFlags = X86II::MO_PLT;
> +    } else if (Subtarget->isPICStyleStubAny() &&
> +               (GV->isDeclaration() || GV->isWeakForLinker()) &&
> +               (!Subtarget->getTargetTriple().isMacOSX() ||
> +                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
> +      // PC-relative references to external symbols should go through $stub,
> +      // unless we're building with the leopard linker or later, which
> +      // automatically synthesizes these stubs.
> +      OpFlags = X86II::MO_DARWIN_STUB;
> +    }
> +
> +    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
> +    if (SymName)
> +      MIB.addExternalSymbol(SymName, OpFlags);
> +    else
> +      MIB.addGlobalAddress(GV, 0, OpFlags);
> +  }
> +
> +  // Add a register mask operand representing the call-preserved registers.
> +  // Proper defs for return values will be added by setPhysRegsDeadExcept().
> +  MIB.addRegMask(TRI.getCallPreservedMask(CC));
> +
> +  // Add an implicit use GOT pointer in EBX.
> +  if (Subtarget->isPICStyleGOT())
> +    MIB.addReg(X86::EBX, RegState::Implicit);
> +
> +  if (Is64Bit && IsVarArg && !IsWin64)
> +    MIB.addReg(X86::AL, RegState::Implicit);
> +
> +  // Add implicit physical register uses to the call.
> +  for (auto Reg : OutRegs)
> +    MIB.addReg(Reg, RegState::Implicit);
> +
> +  // Issue CALLSEQ_END
> +  unsigned NumBytesForCalleeToPop =
> +    computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
> +  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
> +  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
> +    .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
> +
> +  // Now handle call return values.
> +  SmallVector<CCValAssign, 16> RVLocs;
> +  CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs,
> +                    CLI.RetTy->getContext());
> +  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
> +
> +  // Copy all of the result registers out of their specified physreg.
> +  unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
> +  for (unsigned i = 0; i != RVLocs.size(); ++i) {
> +    CCValAssign &VA = RVLocs[i];
> +    EVT CopyVT = VA.getValVT();
> +    unsigned CopyReg = ResultReg + i;
> +
> +    // If this is x86-64, and we disabled SSE, we can't return FP values
> +    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
> +        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
> +      report_fatal_error("SSE register return with SSE disabled");
> +    }
> +
> +    // If this is a call to a function that returns an fp value on the floating
> +    // point stack, we must guarantee the value is popped from the stack, so
> +    // a COPY is not good enough - the copy instruction may be eliminated if the
> +    // return value is not used. We use the FpPOP_RETVAL instruction instead.
> +    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
> +      // If we prefer to use the value in xmm registers, copy it out as f80 and
> +      // use a truncate to move it from fp stack reg to xmm reg.
> +      if (isScalarFPTypeInSSEReg(VA.getValVT())) {
> +        CopyVT = MVT::f80;
> +        CopyReg = createResultReg(&X86::RFP80RegClass);
> +      }
> +      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +              TII.get(X86::FpPOP_RETVAL), CopyReg);
> +
> +      // Round the f80 to the right size, which also moves it to the appropriate
> +      // xmm register. This is accomplished by storing the f80 value in memory
> +      // and then loading it back.
> +      if (CopyVT != VA.getValVT()) {
> +        EVT ResVT = VA.getValVT();
> +        unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
> +        unsigned MemSize = ResVT.getSizeInBits()/8;
> +        int FI = MFI.CreateStackObject(MemSize, MemSize, false);
> +        addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +                                  TII.get(Opc)), FI)
> +          .addReg(CopyReg);
> +        Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
> +        addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +                                  TII.get(Opc), ResultReg + i), FI);
> +      }
> +    } else {
> +      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +              TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
> +      InRegs.push_back(VA.getLocReg());
> +    }
> +  }
> +
> +  CLI.ResultReg = ResultReg;
> +  CLI.NumResultRegs = RVLocs.size();
> +  CLI.Call = MIB;
> +
> +  return true;
> +}
> 
> bool
> X86FastISel::TargetSelectInstruction(const Instruction *I)  {
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits