[llvm-commits] [llvm] r56290 - in /llvm/trunk: include/llvm/CodeGen/LiveIntervalAnalysis.h include/llvm/CodeGen/MachineOperand.h include/llvm/CodeGen/ScheduleDAG.h lib/CodeGen/AsmPrinter/AsmPrinter.cpp lib/CodeGen/LiveIntervalAnalysis.cpp lib/CodeGen/MachineInstr.cpp lib/CodeGen/RegAllocLinearScan.cpp lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp test/CodeGen/X86/2008-09-17-inline-asm-1.ll

Dale Johannesen dalej at apple.com
Thu Sep 18 10:56:50 PDT 2008


On Sep 18, 2008, at 10:17 AMPDT, Evan Cheng wrote:

> Hi Dale,
>
> Thanks for doing this. Comments inline.
>
> On Sep 17, 2008, at 2:13 PM, Dale Johannesen wrote:
>
> LiveIntervals::releaseMemory() should release
> AsmsWithEarlyClobberConflict and AsmsThatEarlyClobber, no?

Right.

>
>>
>> +/// noEarlyclobberConflict - see whether virtual reg VReg has a
>> conflict with
>> +/// hard reg HReg because of earlyclobbers.
>> +///
>> +/// Earlyclobber operands may not be assigned the same register as
>> +/// each other, or as earlyclobber-conflict operands (i.e. those  
>> that
>> +/// are non-earlyclobbered inputs to an asm that also has
>> earlyclobbers).
>> +///
>> +/// Thus there are two cases to check for:
>> +/// 1.  VReg is an earlyclobber-conflict register and HReg is an
>> earlyclobber
>> +/// register in some asm that also has VReg as an input.
>> +/// 2.  VReg is an earlyclobber register and HReg is an
>> earlyclobber-conflict
>> +/// input elsewhere in some asm.
>> +/// In both cases HReg can be assigned by the user, or assigned
>> early in
>> +/// register allocation.
>> +///
>> +/// Dropping the distinction between earlyclobber and earlyclobber-
>> conflict,
>> +/// keeping only one multimap, looks promising, but two
>> earlyclobber-conflict
>> +/// operands may be assigned the same register if they happen to
>> contain the
>> +/// same value, and that implementation would prevent this.
>> +///
>> +bool LiveIntervals::noEarlyclobberConflict(unsigned VReg,
>> VirtRegMap &vrm,
>> +                                           unsigned HReg) {
>> +  typedef std::multimap<unsigned, MachineInstr*>::iterator It;
>> +
>> +  // Short circuit the most common case.
>> +  if (AsmsWithEarlyClobberConflict.size()!=0) {
>
> If AsmsWithEarlyClobberConflict is empty, there is no need to check
> for AsmsThatEarlyClobber, right? And vice versa.

Right, although very minor.

> Also, I don't think this is a safe approach, machine instructions can
> be deleted and leave the maps in the invalid state.

It's safe if deletions update the maps, but I haven't touched the  
coalescer yet.
Agreed, this is bad, guess I shouldn't have checked it in yet.

>> +    std::pair<It, It> x =
>> AsmsWithEarlyClobberConflict.equal_range(VReg);
>> +    for (It I = x.first; I!=x.second; I++) {
>> +      MachineInstr* MI = I->second;
>> +      for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
>> +        MachineOperand &MO = MI->getOperand(i);
>> +        if (MO.isRegister() && MO.isEarlyClobber()) {
>> +          unsigned PhysReg = MO.getReg();
>> +          if (PhysReg &&
>> TargetRegisterInfo::isVirtualRegister(PhysReg)) {
>> +            if (!vrm.hasPhys(PhysReg))
>> +              continue;
>> +            PhysReg = vrm.getPhys(PhysReg);
>> +          }
>> +          if (PhysReg==HReg)
>> +            return false;
>> +        }
>> +      }
>> +    }
>> +  }
>> +  // Short circuit the most common case.
>> +  if (AsmsThatEarlyClobber.size()!=0) {
>> +    std::pair<It, It> x = AsmsThatEarlyClobber.equal_range(VReg);
>> +    for (It I = x.first; I!=x.second; I++) {
>> +      MachineInstr* MI = I->second;
>> +      for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
>> +        MachineOperand &MO = MI->getOperand(i);
>> +        if (MO.isRegister() && MO.overlapsEarlyClobber()) {
>> +          unsigned PhysReg = MO.getReg();
>> +          if (PhysReg &&
>> TargetRegisterInfo::isVirtualRegister(PhysReg)) {
>> +            if (!vrm.hasPhys(PhysReg))
>> +              continue;
>> +            PhysReg = vrm.getPhys(PhysReg);
>> +          }
>> +          if (PhysReg==HReg)
>> +            return false;
>> +        }
>> +      }
>> +    }
>> +  }
>> +  return true;
>> +}
>>
>> LiveInterval* LiveIntervals::createInterval(unsigned reg) {
>>  float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ?
>>
>>
>> Modified: llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp?rev=56290&r1=56289&r2=56290&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp Wed Sep 17
>> 16:13:11 2008
>> @@ -1050,7 +1050,8 @@
>>  TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
>>  assert(I != E && "No allocatable register in this register class!");
>>  for (; I != E; ++I)
>> -    if (prt_->isRegAvail(*I)) {
>> +    if (prt_->isRegAvail(*I) &&
>> +        li_->noEarlyclobberConflict(cur->reg, *vrm_, *I)) {
>
> Ok, I have two concerns. 1. We need to do this check for every
> interval.

Yes, but in the typical case where there's no asm's it returns  
immediately.
I could add a bit to the LiveInterval easily enough (had this at one  
point,
but eventually decided I didn't need it).  Of course you still have to  
check
the bit for each interval, but there's no escape from that I don't  
think.

> 2. After AsmsThatEarlyClobber and
> AsmsWithEarlyClobberConflict are populated virtual registers can
> change due to coalescing (and potentially other passes). Then the maps
> won't be accurate.
>
> Perhaps we should add a bit to LiveInterval to indicate a vr can be
> earlyclobber or earlyclobberconflict. Coalescer should not touch any
> register whose interval has the bit. Then we only do the check for
> those intervals here. The check can use MachineRegisterInfo def / use
> chain to look for any conflicts.

I preferred a lookup table because walking a chain can get slow, but  
OK, I can
do it this way.  It may be that maintaining the table correctly is too  
hard.

> Evan
>
>>
>>      FreeReg = *I;
>>      if (FreeReg < inactiveCounts.size())
>>        FreeRegInactiveCount = inactiveCounts[FreeReg];
>> @@ -1070,7 +1071,8 @@
>>  for (; I != E; ++I) {
>>    unsigned Reg = *I;
>>    if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() &&
>> -        FreeRegInactiveCount < inactiveCounts[Reg]) {
>> +        FreeRegInactiveCount < inactiveCounts[Reg] &&
>> +        li_->noEarlyclobberConflict(cur->reg, *vrm_, Reg)) {
>>      FreeReg = Reg;
>>      FreeRegInactiveCount = inactiveCounts[Reg];
>>      if (FreeRegInactiveCount == MaxInactiveCount)
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp?rev=56290&r1=56289&r2=56290&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp  
>> (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGEmit.cpp Wed Sep
>> 17 16:13:11 2008
>> @@ -231,7 +231,8 @@
>> void ScheduleDAG::AddOperand(MachineInstr *MI, SDValue Op,
>>                             unsigned IIOpNum,
>>                             const TargetInstrDesc *II,
>> -                             DenseMap<SDValue, unsigned>
>> &VRBaseMap) {
>> +                             DenseMap<SDValue, unsigned> &VRBaseMap,
>> +                             bool overlapsEarlyClobber) {
>>  if (Op.isMachineOpcode()) {
>>    // Note that this case is redundant with the final else block,
>> but we
>>    // include it because it is the most common and it makes the logic
>> @@ -244,7 +245,9 @@
>>    const TargetInstrDesc &TID = MI->getDesc();
>>    bool isOptDef = IIOpNum < TID.getNumOperands() &&
>>      TID.OpInfo[IIOpNum].isOptionalDef();
>> -    MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
>> +    MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, false,
>> false,
>> +                                             false, 0, false,
>> +                                             overlapsEarlyClobber));
>>
>>    // Verify that it is right.
>>    assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a
>> vreg?");
>> @@ -278,7 +281,9 @@
>>    const ConstantFP *CFP = F->getConstantFPValue();
>>    MI->addOperand(MachineOperand::CreateFPImm(CFP));
>>  } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
>> -    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
>> +    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false,
>> false,
>> +                                             false, false, 0, false,
>> +                                             overlapsEarlyClobber));
>>  } else if (GlobalAddressSDNode *TGA =
>> dyn_cast<GlobalAddressSDNode>(Op)) {
>>    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA-
>>> getOffset()));
>>  } else if (BasicBlockSDNode *BB = dyn_cast<BasicBlockSDNode>(Op)) {
>> @@ -314,7 +319,9 @@
>>           Op.getValueType() != MVT::Flag &&
>>           "Chain and flag operands should occur at end of operand
>> list!");
>>    unsigned VReg = getVR(Op, VRBaseMap);
>> -    MI->addOperand(MachineOperand::CreateReg(VReg, false));
>> +    MI->addOperand(MachineOperand::CreateReg(VReg, false, false,
>> +                                             false, false, 0, false,
>> +                                             overlapsEarlyClobber));
>>
>>    // Verify that it is right.  Note that the reg class of the
>> physreg and the
>>    // vreg don't necessarily need to match, but the target copy
>> insertion has
>> @@ -596,6 +603,7 @@
>>
>>    // Add all of the operand registers to the instruction.
>>    for (unsigned i = 2; i != NumOps;) {
>> +      bool overlapsEarlyClobber = false;
>>      unsigned Flags =
>>        cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
>>      unsigned NumVals = Flags >> 3;
>> @@ -618,13 +626,18 @@
>>                                                   false, 0, true));
>>        }
>>        break;
>> +      case 7:  // Addressing mode overlapping earlyclobber.
>> +      case 5:  // Use of register overlapping earlyclobber.
>> +        overlapsEarlyClobber = true;
>> +        // fall through
>>      case 1:  // Use of register.
>>      case 3:  // Immediate.
>>      case 4:  // Addressing mode.
>>        // The addressing mode has been selected, just add all of the
>>        // operands to the machine instruction.
>>        for (; NumVals; --NumVals, ++i)
>> -          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
>> +          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
>> +                     overlapsEarlyClobber);
>>        break;
>>      }
>>    }
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp?rev=56290&r1=56289&r2=56290&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
>> (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp Wed
>> Sep 17 16:13:11 2008
>> @@ -4909,8 +4909,10 @@
>>        assert(OpInfo.isIndirect && "Memory output must be indirect
>> operand");
>>
>>        // Add information to the INLINEASM node to know about this
>> output.
>> -        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
>> -        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
>> +        unsigned ResOpType = SawEarlyClobber ?
>> +                                  7 /* MEM OVERLAPS EARLYCLOBBER  
>> */ :
>> +                                  4/*MEM*/;
>> +        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType |
>> (1<<3),
>>
>> TLI.getPointerTy()));
>>        AsmNodeOperands.push_back(OpInfo.CallOperand);
>>        break;
>> @@ -4963,7 +4965,8 @@
>>            cast<ConstantSDNode>(AsmNodeOperands[CurOp])-
>>> getZExtValue();
>>          assert(((NumOps & 7) == 2 /*REGDEF*/ ||
>>                  (NumOps & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
>> -                  (NumOps & 7) == 4 /*MEM*/) &&
>> +                  (NumOps & 7) == 4 /*MEM*/ ||
>> +                  (NumOps & 7) == 7 /*MEM OVERLAPS EARLYCLOBBER*/)  
>> &&
>>                 "Skipped past definitions?");
>>          CurOp += (NumOps>>3)+1;
>>        }
>> @@ -4985,14 +4988,17 @@
>>
>>          // Use the produced MatchedRegs object to
>>          MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, &Flag);
>> -          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG,
>> AsmNodeOperands);
>> +          MatchedRegs.AddInlineAsmOperands(SawEarlyClobber ?
>> +                                           1 /*REGUSE*/ :
>> +                                           5 /*REGUSE OVERLAPS
>> EARLYCLOBBER*/,
>> +                                           DAG, AsmNodeOperands);
>>          break;
>>        } else {
>> -          assert((NumOps & 7) == 4/*MEM*/ && "Unknown matching
>> constraint!");
>> +          assert(((NumOps & 7) == 7/*MEM OVERLAPS EARLYCLOBBER */ ||
>> +                  (NumOps & 7) == 4) && "Unknown matching
>> constraint!");
>>          assert((NumOps >> 3) == 1 && "Unexpected number of
>> operands");
>>          // Add information to the INLINEASM node to know about
>> this input.
>> -          unsigned ResOpType = 4/*MEM*/ | (1 << 3);
>> -          AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
>> +          AsmNodeOperands.push_back(DAG.getTargetConstant(NumOps,
>>
>> TLI.getPointerTy()));
>>          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
>>          break;
>> @@ -5024,8 +5030,10 @@
>>               "Memory operands expect pointer values");
>>
>>        // Add information to the INLINEASM node to know about this
>> input.
>> -        unsigned ResOpType = 4/*MEM*/ | (1 << 3);
>> -        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
>> +        unsigned ResOpType = SawEarlyClobber ?
>> +                                7 /* MEM OVERLAPS EARLYCLOBBER */ :
>> +                                4/*MEM*/;
>> +        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType |
>> (1<<3),
>>
>> TLI.getPointerTy()));
>>        AsmNodeOperands.push_back(InOperandVal);
>>        break;
>> @@ -5043,16 +5051,18 @@
>>
>>      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, Chain,
>> &Flag);
>>
>> -      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG,
>> -                                               AsmNodeOperands);
>> +      OpInfo.AssignedRegs.AddInlineAsmOperands(SawEarlyClobber ?
>> +                                           5 /*REGUSE OVERLAPS
>> EARLYCLOBBER*/:
>> +                                           1/*REGUSE*/,
>> +                                           DAG, AsmNodeOperands);
>>      break;
>>    }
>>    case InlineAsm::isClobber: {
>>      // Add the clobbered value to the operand list, so that the
>> register
>>      // allocator is aware that the physreg got clobbered.
>>      if (!OpInfo.AssignedRegs.Regs.empty())
>> -        OpInfo.AssignedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG,
>> -                                                 AsmNodeOperands);
>> +        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER
>> REGDEF */,
>> +                                                 DAG,
>> AsmNodeOperands);
>>      break;
>>    }
>>    }
>>
>> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=56290&r1=56289&r2=56290&view=diff
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> = 
>> =====================================================================
>> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
>> (original)
>> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Wed Sep
>> 17 16:13:11 2008
>> @@ -1113,7 +1113,8 @@
>>
>>  while (i != e) {
>>    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
>> -    if ((Flags & 7) != 4 /*MEM*/) {
>> +    if ((Flags & 7) != 4 /*MEM*/ &&
>> +        (Flags & 7) != 7 /*MEM OVERLAPS EARLYCLOBBER*/) {
>>      // Just skip over this operand, copying the operands verbatim.
>>      Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags
>>>> 3) + 1);
>>      i += (Flags >> 3) + 1;
>> @@ -1128,7 +1129,7 @@
>>
>>      // Add this to the output node.
>>      MVT IntPtrTy = CurDAG->getTargetLoweringInfo().getPointerTy();
>> -      Ops.push_back(CurDAG->getTargetConstant(4/*MEM*/ |
>> (SelOps.size() << 3),
>> +      Ops.push_back(CurDAG->getTargetConstant((Flags & 7) |
>> (SelOps.size()<< 3),
>>                                              IntPtrTy));
>>      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
>>      i += 2;
>>
>> Added: llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll?rev=56290&view=auto
>>
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> =
>> = 
>> =====================================================================
>> --- llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll (added)
>> +++ llvm/trunk/test/CodeGen/X86/2008-09-17-inline-asm-1.ll Wed Sep
>> 17 16:13:11 2008
>> @@ -0,0 +1,24 @@
>> +; RUN: llvm-as < %s | llc -o - -march=x86 | not grep "movl %eax,
>> %eax"
>> +; RUN: llvm-as < %s | llc -o - -march=x86 | not grep "movl %edx,
>> %edx"
>> +; RUN: llvm-as < %s | llc -o - -march=x86 | not grep "movl (%eax),
>> %eax"
>> +; RUN: llvm-as < %s | llc -o - -march=x86 | not grep "movl (%edx),
>> %edx"
>> +
>> +; %0 must not be put in EAX or EDX.
>> +; In the first asm, $0 and $2 must not be put in EAX.
>> +; In the second asm, $0 and $2 must not be put in EDX.
>> +; This is kind of hard to test thoroughly, but the things above
>> should continue
>> +; to pass, I think.
>> +; ModuleID = '<stdin>'
>> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-
>> i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-
>> f80:128:128"
>> +target triple = "i386-apple-darwin8"
>> + at x = common global i32 0		; <i32*> [#uses=1]
>> +
>> +define i32 @aci(i32* %pw) nounwind {
>> +entry:
>> +	%0 = load i32* @x, align 4		; <i32> [#uses=1]
>> +	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A
>> \09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A
>> \09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:",
>> "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32*
>> %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=0]
>> +	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A
>> \09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A
>> \09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:",
>> "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32*
>> %pw, i32* %pw) nounwind		; <{ i32, i32 }> [#uses=1]
>> +	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0		; <i32>
>> [#uses=1]
>> +	%1 = add i32 %asmresult3, %0		; <i32> [#uses=1]
>> +	ret i32 %1
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list