[LLVMdev] Virtual register problem in X86 backend

Quentin Colombet qcolombet at apple.com
Wed Dec 10 13:19:32 PST 2014


Hi Julien,

Thanks for the input, I think I know what is going on.

On Dec 10, 2014, at 1:13 AM, Rinaldini Julien <julien.rinaldini at heig-vd.ch> wrote:

> Hi,
> 
> Thx for your help...
> 
> Here is the IR code:
> 
> ; ModuleID = 'foo_bar.c'
> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-unknown-linux-gnu"
> 
> @.str = private unnamed_addr constant [6 x i8] c"MAIN\0A\00", align 1
> 
> ; Function Attrs: nounwind uwtable
> define i32 @main(i32 %argc, i8** %argv) #0 {
> entry:
>  %retval = alloca i32, align 4
>  %argc.addr = alloca i32, align 4
>  %argv.addr = alloca i8**, align 8
>  store i32 0, i32* %retval
>  store i32 %argc, i32* %argc.addr, align 4
>  store i8** %argv, i8*** %argv.addr, align 8
>  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x
> i8]* @.str, i32 0, i32 0))
>  call void @llvm.burnstack()
>  ret i32 0
> }
> 
> declare i32 @printf(i8*, ...) #1
> 
> declare void @llvm.va_start(i8*)
> 
> declare void @llvm.va_end(i8*)
> 
> ; Function Attrs: nounwind
> declare void @llvm.burnstack() #2
> 
> attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
> "no-infs-fp-math"="false" "no-nans-fp-math"="false"
> "stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
> "use-soft-float"="false" }
> attributes #1 = { "less-precise-fpmad"="false"
> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
> "no-infs-fp-math"="false" "no-nans-fp-math"="false"
> "stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
> "use-soft-float"="false" }
> attributes #2 = { nounwind }
> 
> !llvm.ident = !{!0}
> 
> !0 = metadata !{metadata !"clang version 3.5.0 (tags/RELEASE_350/final) ()"}
> 
> Here is the assembly generated by llc (I'm not sure that's what you
> meant by 'final assembly'):
> 
>        .text
>        .file   "foo_bar.ll"
>        .globl  main
>        .align  16, 0x90
>        .type   main, at function
> main:                                   # @main
>        .cfi_startproc
> # BB#0:                                 # %entry
>        pushq   %rbp
> .Ltmp0:
>        .cfi_def_cfa_offset 16
> .Ltmp1:
>        .cfi_offset %rbp, -16
>        movq    %rsp, %rbp
> .Ltmp2:
>        .cfi_def_cfa_register %rbp
>        subq    $16, %rsp
>        movl    $0, -4(%rbp)
>        movl    %edi, -8(%rbp)
>        movq    %rsi, -16(%rbp)
>        movl    $.L.str, %edi
>        xorl    %eax, %eax
>        callq   printf
>        callq   llvm.burnstack
>        xorl    %eax, %eax
>        addq    $16, %rsp
>        popq    %rbp
>        retq
> .Ltmp3:
>        .size   main, .Ltmp3-main
>        .cfi_endproc
> 
>        .type   .L.str, at object          # @.str
>        .section        .rodata.str1.1,"aMS", at progbits,1
> .L.str:
>        .asciz  "MAIN\n"
>        .size   .L.str, 6
> 
> 
>        .ident  "clang version 3.5.0 (tags/RELEASE_350/final) ()"
>        .section        ".note.GNU-stack","", at progbits
> 
> And here is the machine code in X86IselLowering.cpp generated by my
> custom inserter:
> 
> # Machine code for function main: SSA
> Frame Objects:
>  fi#0: size=4, align=4, at location [SP+8]
>  fi#1: size=4, align=4, at location [SP+8]
>  fi#2: size=8, align=8, at location [SP+8]
> Function Live Ins: %EDI in %vreg0, %RSI in %vreg2
> 
> BB#0: derived from LLVM BB %entry
>    Live Ins: %EDI %RSI
>        %vreg2<def> = COPY %RSI; GR64:%vreg2
>        %vreg0<def> = COPY %EDI; GR32:%vreg0
>        %vreg1<def> = COPY %vreg0<kill>; GR32:%vreg1,%vreg0
>        %vreg3<def> = COPY %vreg2<kill>; GR64:%vreg3,%vreg2
>        %vreg5<def> = MOV64ri <ga:@.str>; GR64:%vreg5
>        MOV32mi <fi#0>, 1, %noreg, 0, %noreg, 0; mem:ST4[%retval]
>        MOV32mr <fi#1>, 1, %noreg, 0, %noreg, %vreg1;
> mem:ST4[%argc.addr] GR32:%vreg1
>        MOV64mr <fi#2>, 1, %noreg, 0, %noreg, %vreg3;
> mem:ST8[%argv.addr] GR64:%vreg3
>        ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def>, %RSP<imp-use>
>        %RDI<def> = COPY %vreg5; GR64:%vreg5
>        %AL<def> = MOV8ri 0
>        CALL64pcrel32 <ga:@printf>, <regmask>, %RSP<imp-use>,
> %AL<imp-use>, %RDI<imp-use>, %EAX<imp-def>
>        ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def>,
> %RSP<imp-use>
>        %vreg6<def> = COPY %EAX; GR32:%vreg6
>        %vreg4<def> = MOV32ri 0; GR32:%vreg4
>        MOV64rr %vreg7, %RSP; GR64:%vreg7

Here vreg7 should be <def>. This is not the case, because you have a typo in your code (see below).

>    Successors according to CFG: BB#1
> 
> BB#1: derived from LLVM BB %entry
>    Predecessors according to CFG: BB#0 BB#2
>        %vreg8<def> = PHI %vreg7, <BB#0>, %vreg9, <BB#2>;
> GR64:%vreg8,%vreg7,%vreg9
>        CMP64rr %vreg8, %RBP, %EFLAGS<imp-def>; GR64:%vreg8
>        JE_4 <BB#3>, %EFLAGS<imp-use>
>    Successors according to CFG: BB#2 BB#3
> 
> BB#2: derived from LLVM BB %entry
>    Predecessors according to CFG: BB#1
>        MOV32mi %vreg8, 1, %noreg, 0, %noreg, 0; GR64:%vreg8
>        %vreg9<def,tied1> = ADD64ri32 %vreg8<tied0>, 8,
> %EFLAGS<imp-def>; GR64:%vreg9,%vreg8
>        JMP_4 <BB#1>
>    Successors according to CFG: BB#1 BB#3
> 
> BB#3: derived from LLVM BB %entry
>    Predecessors according to CFG: BB#1 BB#2
>        %EAX<def> = COPY %vreg4; GR32:%vreg4
>        RETQ %EAX<imp-use>
> 
> # End machine code for function main.
> 
> Cheers
> 
> On 12/09/2014 08:01 PM, Quentin Colombet wrote:
>> Hi Julien,
>> 
>> I have to admit that the way you build the virtual registers looks correct to me.
>> 
>> Could you attach the machine IR right before and after the insertion as well as the final assembly (i.e., not just the binary), to see if I can help you further.
>> 
>> Thanks,
>> -Quentin
>> On Dec 8, 2014, at 5:56 AM, Rinaldini Julien <julien.rinaldini at heig-vd.ch> wrote:
>> 
>>> Hi,
>>> 
>>> I'm having trouble using virtual register in the X86 backend.
>>> 
>>> I implemented a new intrinsic and I use a custom inserter. The goal of
>>> the intrinsic is to set the content of the stack to zero at the end of
>>> each function.
>>> 
>>> Here is my code:
>>> 
>>> MachineBasicBlock *
>>> X86TargetLowering::EmitBURNSTACKWithCustomInserter(
>>>                    MachineInstr *MI,
>>>                    MachineBasicBlock *MBB) const {
>>>   DebugLoc db = MI->getDebugLoc();
>>>   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
>>>   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
>>>   MachineFunction *F = MBB->getParent();
>>> 
>>>   // Create all the basicblocks
>>>   MachineBasicBlock *MBB_cond = F->CreateMachineBasicBlock(LLVM_BB);
>>>   MachineBasicBlock *MBB_erase = F->CreateMachineBasicBlock(LLVM_BB);
>>>   MachineBasicBlock *MBB_end = F->CreateMachineBasicBlock(LLVM_BB);
>>> 
>>>   // Insert the new basicblocks
>>>   F->insert(MBB, MBB_cond);
>>>   F->insert(MBB, MBB_erase);
>>>   F->insert(MBB, MBB_end);
>>> 
>>>   // Split the last MBB in two
>>>   MBB_end->splice(MBB_end->begin(), MBB,
>>> next(MachineBasicBlock::iterator(MI)), MBB->end());
>>>   MBB_end->transferSuccessorsAndUpdatePHIs(MBB);
>>> 
>>>   // Move MBB at the right place
>>>   MBB_end->moveAfter(MBB);
>>>   MBB_erase->moveAfter(MBB);
>>>   MBB_cond->moveAfter(MBB);
>>> 
>>>   // Set the new successors
>>>   MBB->addSuccessor(MBB_cond);
>>>   MBB_cond->addSuccessor(MBB_erase);
>>>   MBB_cond->addSuccessor(MBB_end);
>>>   MBB_erase->addSuccessor(MBB_cond);
>>>   MBB_erase->addSuccessor(MBB_end);
>>> 
>>>   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
>>>   const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
>>>   unsigned regA = MRI.createVirtualRegister(AddrRegClass);
>>>   unsigned regB = MRI.createVirtualRegister(AddrRegClass);
>>>   unsigned regC = MRI.createVirtualRegister(AddrRegClass);
>>> 
>>>   // Set the indice
>>>   BuildMI(*MBB, MI, db,
>>> TII->get(X86::MOV64rr)).addReg(regA).addReg(X86::RSP);

Either, put regA as the last argument of BuildMI (like you did for the PHI for instance), or add the Define flag to the related regA.

Hopefully, that should fix your issue :).

Cheers,
-Quentin

>>> 
>>>   // Check condition
>>>   BuildMI(*MBB_cond, MBB_cond->end(), db, TII->get(X86::PHI),
>>> regB).addReg(regA).addMBB(MBB).addReg(regC).addMBB(MBB_erase);
>>>   BuildMI(*MBB_cond, MBB_cond->end(), db,
>>> TII->get(X86::CMP64rr)).addReg(regB).addReg(X86::RBP);
>>>   BuildMI(*MBB_cond, MBB_cond->end(), db,
>>> TII->get(X86::JE_4)).addMBB(MBB_end);
>>> 
>>>   // mov dword[reg], 0x0
>>>   BuildMI(*MBB_erase, MBB_erase->end(), db,
>>> TII->get(X86::MOV32mi)).addReg(regB).addImm(1).addReg(0).addImm(0).addReg(0).addImm(0);
>>>  BuildMI(*MBB_erase, MBB_erase->end(), db, TII->get(X86::ADD64ri32),
>>> regC).addReg(regB).addImm(8);
>>>   BuildMI(*MBB_cond, MBB_erase->end(), db,
>>> TII->get(X86::JMP_4)).addMBB(MBB_cond);
>>> 
>>>   // Erase intrinsic
>>>   MI->eraseFromParent();
>>>   MBB->getParent()->dump();
>>>   return MBB_erase;
>>> }
>>> 
>>> I run it on this sample code:
>>> 
>>> #include <stdio.h>
>>> 
>>> int main(int argc, char **argv) {
>>> printf("MAIN\n");
>>> return 0;
>>> }
>>> 
>>> And it generate this X86 assembly:
>>> 
>>> / (fcn) sym.main 115
>>> |          0x004004f0 b  55           push rbp
>>> |          0x004004f1    4889e5       mov rbp, rsp
>>> |          0x004004f4    4883ec30     sub rsp, 0x30
>>> |          0x004004f8    48b8f405400. mov rax, str.MAIN ;  0x004005f4
>>> |          0x00400502    c745fc00000. mov dword [rbp-0x4], 0x0
>>> |          0x00400509    897df8       mov [rbp-0x8], edi
>>> |          0x0040050c    488975f0     mov [rbp-0x10], rsi
>>> |          0x00400510    4889c7       mov rdi, rax
>>> |          0x00400513    b000         mov al, 0x0
>>> |          0x00400515    e8a6feffff   call sym.imp.printf
>>> |             sym.imp.printf(unk)
>>> |          0x0040051a    b900000000   mov ecx, 0x0
>>> |          0x0040051f    488b75e8     mov rsi, [rbp-0x18]
>>> |          0x00400523    4889e6       mov rsi, rsp
>>> |          0x00400526    8945e4       mov [rbp-0x1c], eax
>>> |          0x00400529    894de0       mov [rbp-0x20], ecx
>>> |          0x0040052c    48897dd8     mov [rbp-0x28], rdi
>>> |     .    ; CODE (CALL) XREF from 0x00400555 (fcn.004004bc)
>>> |- loc.00400530 51
>>> |     .--> 0x00400530    488b45d8     mov rax, [rbp-0x28]
>>> |     |    0x00400534    4839e8       cmp rax, rbp
>>> |     |    0x00400537    488945d0     mov [rbp-0x30], rax
>>> |     |,=< 0x0040053b    0f8419000000 je 0x40055a
>>> |     ||   0x00400541    488b45d0     mov rax, [rbp-0x30]
>>> |     ||   0x00400545    c70000000000 mov dword [rax], 0x0
>>> |     ||   0x0040054b    480508000000 add rax, 0x8
>>> |     ||   0x00400551    488945d8     mov [rbp-0x28], rax
>>> |     |    ; CODE (CALL) XREF from 0x00400530 (fcn.004004bc)
>>> |     `==< 0x00400555    e9d6ffffff   jmp loc.00400530
>>> |      `-> 0x0040055a    8b45e0       mov eax, [rbp-0x20]
>>> |          0x0040055d    4883c430     add rsp, 0x30
>>> |          0x00400561    5d           pop rbp
>>> \          0x00400562    c3           ret
>>> 
>>> 
>>> As we can see, it moves RSP in RSI, but then, generates the rest of the
>>> code with RAX, so it fails.
>>> 
>>> Am I missing something?
>>> 
>>> Cheers
>>> _______________________________________________
>>> LLVM Developers mailing list
>>> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
>> 
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev





More information about the llvm-dev mailing list