[LLVMdev] Virtual register problem in X86 backend

Rinaldini Julien julien.rinaldini at heig-vd.ch
Wed Dec 10 01:13:16 PST 2014


Hi,

Thx for your help...

Here is the IR code:

; ModuleID = 'foo_bar.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [6 x i8] c"MAIN\0A\00", align 1

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 0, i32* %retval
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x
i8]* @.str, i32 0, i32 0))
  call void @llvm.burnstack()
  ret i32 0
}

declare i32 @printf(i8*, ...) #1

declare void @llvm.va_start(i8*)

declare void @llvm.va_end(i8*)

; Function Attrs: nounwind
declare void @llvm.burnstack() #2

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.ident = !{!0}

!0 = metadata !{metadata !"clang version 3.5.0 (tags/RELEASE_350/final) ()"}

Here is the assembly generated by llc (I'm not sure that's what you
meant by 'final assembly'):

        .text
        .file   "foo_bar.ll"
        .globl  main
        .align  16, 0x90
        .type   main, at function
main:                                   # @main
        .cfi_startproc
# BB#0:                                 # %entry
        pushq   %rbp
.Ltmp0:
        .cfi_def_cfa_offset 16
.Ltmp1:
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
.Ltmp2:
        .cfi_def_cfa_register %rbp
        subq    $16, %rsp
        movl    $0, -4(%rbp)
        movl    %edi, -8(%rbp)
        movq    %rsi, -16(%rbp)
        movl    $.L.str, %edi
        xorl    %eax, %eax
        callq   printf
        callq   llvm.burnstack
        xorl    %eax, %eax
        addq    $16, %rsp
        popq    %rbp
        retq
.Ltmp3:
        .size   main, .Ltmp3-main
        .cfi_endproc

        .type   .L.str, at object          # @.str
        .section        .rodata.str1.1,"aMS", at progbits,1
.L.str:
        .asciz  "MAIN\n"
        .size   .L.str, 6


        .ident  "clang version 3.5.0 (tags/RELEASE_350/final) ()"
        .section        ".note.GNU-stack","", at progbits

And here is the machine code in X86IselLowering.cpp generated by my
custom inserter:

# Machine code for function main: SSA
Frame Objects:
  fi#0: size=4, align=4, at location [SP+8]
  fi#1: size=4, align=4, at location [SP+8]
  fi#2: size=8, align=8, at location [SP+8]
Function Live Ins: %EDI in %vreg0, %RSI in %vreg2

BB#0: derived from LLVM BB %entry
    Live Ins: %EDI %RSI
        %vreg2<def> = COPY %RSI; GR64:%vreg2
        %vreg0<def> = COPY %EDI; GR32:%vreg0
        %vreg1<def> = COPY %vreg0<kill>; GR32:%vreg1,%vreg0
        %vreg3<def> = COPY %vreg2<kill>; GR64:%vreg3,%vreg2
        %vreg5<def> = MOV64ri <ga:@.str>; GR64:%vreg5
        MOV32mi <fi#0>, 1, %noreg, 0, %noreg, 0; mem:ST4[%retval]
        MOV32mr <fi#1>, 1, %noreg, 0, %noreg, %vreg1;
mem:ST4[%argc.addr] GR32:%vreg1
        MOV64mr <fi#2>, 1, %noreg, 0, %noreg, %vreg3;
mem:ST8[%argv.addr] GR64:%vreg3
        ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def>, %RSP<imp-use>
        %RDI<def> = COPY %vreg5; GR64:%vreg5
        %AL<def> = MOV8ri 0
        CALL64pcrel32 <ga:@printf>, <regmask>, %RSP<imp-use>,
%AL<imp-use>, %RDI<imp-use>, %EAX<imp-def>
        ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def>,
%RSP<imp-use>
        %vreg6<def> = COPY %EAX; GR32:%vreg6
        %vreg4<def> = MOV32ri 0; GR32:%vreg4
        MOV64rr %vreg7, %RSP; GR64:%vreg7
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %entry
    Predecessors according to CFG: BB#0 BB#2
        %vreg8<def> = PHI %vreg7, <BB#0>, %vreg9, <BB#2>;
GR64:%vreg8,%vreg7,%vreg9
        CMP64rr %vreg8, %RBP, %EFLAGS<imp-def>; GR64:%vreg8
        JE_4 <BB#3>, %EFLAGS<imp-use>
    Successors according to CFG: BB#2 BB#3

BB#2: derived from LLVM BB %entry
    Predecessors according to CFG: BB#1
        MOV32mi %vreg8, 1, %noreg, 0, %noreg, 0; GR64:%vreg8
        %vreg9<def,tied1> = ADD64ri32 %vreg8<tied0>, 8,
%EFLAGS<imp-def>; GR64:%vreg9,%vreg8
        JMP_4 <BB#1>
    Successors according to CFG: BB#1 BB#3

BB#3: derived from LLVM BB %entry
    Predecessors according to CFG: BB#1 BB#2
        %EAX<def> = COPY %vreg4; GR32:%vreg4
        RETQ %EAX<imp-use>

# End machine code for function main.

Cheers

On 12/09/2014 08:01 PM, Quentin Colombet wrote:
> Hi Julien,
> 
> I have to admit that the way you build the virtual registers looks correct to me.
> 
> Could you attach the machine IR right before and after the insertion as well as the final assembly (i.e., not just the binary), to see if I can help you further.
> 
> Thanks,
> -Quentin
> On Dec 8, 2014, at 5:56 AM, Rinaldini Julien <julien.rinaldini at heig-vd.ch> wrote:
> 
>> Hi,
>>
>> I'm having trouble using virtual register in the X86 backend.
>>
>> I implemented a new intrinsic and I use a custom inserter. The goal of
>> the intrinsic is to set the content of the stack to zero at the end of
>> each function.
>>
>> Here is my code:
>>
>> MachineBasicBlock *
>> X86TargetLowering::EmitBURNSTACKWithCustomInserter(
>>                     MachineInstr *MI,
>>                     MachineBasicBlock *MBB) const {
>>    DebugLoc db = MI->getDebugLoc();
>>    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
>>    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
>>    MachineFunction *F = MBB->getParent();
>>
>>    // Create all the basicblocks
>>    MachineBasicBlock *MBB_cond = F->CreateMachineBasicBlock(LLVM_BB);
>>    MachineBasicBlock *MBB_erase = F->CreateMachineBasicBlock(LLVM_BB);
>>    MachineBasicBlock *MBB_end = F->CreateMachineBasicBlock(LLVM_BB);
>>
>>    // Insert the new basicblocks
>>    F->insert(MBB, MBB_cond);
>>    F->insert(MBB, MBB_erase);
>>    F->insert(MBB, MBB_end);
>>
>>    // Split the last MBB in two
>>    MBB_end->splice(MBB_end->begin(), MBB,
>> next(MachineBasicBlock::iterator(MI)), MBB->end());
>>    MBB_end->transferSuccessorsAndUpdatePHIs(MBB);
>>
>>    // Move MBB at the right place
>>    MBB_end->moveAfter(MBB);
>>    MBB_erase->moveAfter(MBB);
>>    MBB_cond->moveAfter(MBB);
>>
>>    // Set the new successors
>>    MBB->addSuccessor(MBB_cond);
>>    MBB_cond->addSuccessor(MBB_erase);
>>    MBB_cond->addSuccessor(MBB_end);
>>    MBB_erase->addSuccessor(MBB_cond);
>>    MBB_erase->addSuccessor(MBB_end);
>>
>>    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
>>    const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
>>    unsigned regA = MRI.createVirtualRegister(AddrRegClass);
>>    unsigned regB = MRI.createVirtualRegister(AddrRegClass);
>>    unsigned regC = MRI.createVirtualRegister(AddrRegClass);
>>
>>    // Set the indice
>>    BuildMI(*MBB, MI, db,
>> TII->get(X86::MOV64rr)).addReg(regA).addReg(X86::RSP);
>>
>>    // Check condition
>>    BuildMI(*MBB_cond, MBB_cond->end(), db, TII->get(X86::PHI),
>> regB).addReg(regA).addMBB(MBB).addReg(regC).addMBB(MBB_erase);
>>    BuildMI(*MBB_cond, MBB_cond->end(), db,
>> TII->get(X86::CMP64rr)).addReg(regB).addReg(X86::RBP);
>>    BuildMI(*MBB_cond, MBB_cond->end(), db,
>> TII->get(X86::JE_4)).addMBB(MBB_end);
>>
>>    // mov dword[reg], 0x0
>>    BuildMI(*MBB_erase, MBB_erase->end(), db,
>> TII->get(X86::MOV32mi)).addReg(regB).addImm(1).addReg(0).addImm(0).addReg(0).addImm(0);
>>   BuildMI(*MBB_erase, MBB_erase->end(), db, TII->get(X86::ADD64ri32),
>> regC).addReg(regB).addImm(8);
>>    BuildMI(*MBB_cond, MBB_erase->end(), db,
>> TII->get(X86::JMP_4)).addMBB(MBB_cond);
>>
>>    // Erase intrinsic
>>    MI->eraseFromParent();
>>    MBB->getParent()->dump();
>>    return MBB_erase;
>> }
>>
>> I run it on this sample code:
>>
>> #include <stdio.h>
>>
>> int main(int argc, char **argv) {
>>  printf("MAIN\n");
>>  return 0;
>> }
>>
>> And it generate this X86 assembly:
>>
>> / (fcn) sym.main 115
>> |          0x004004f0 b  55           push rbp
>> |          0x004004f1    4889e5       mov rbp, rsp
>> |          0x004004f4    4883ec30     sub rsp, 0x30
>> |          0x004004f8    48b8f405400. mov rax, str.MAIN ;  0x004005f4
>> |          0x00400502    c745fc00000. mov dword [rbp-0x4], 0x0
>> |          0x00400509    897df8       mov [rbp-0x8], edi
>> |          0x0040050c    488975f0     mov [rbp-0x10], rsi
>> |          0x00400510    4889c7       mov rdi, rax
>> |          0x00400513    b000         mov al, 0x0
>> |          0x00400515    e8a6feffff   call sym.imp.printf
>> |             sym.imp.printf(unk)
>> |          0x0040051a    b900000000   mov ecx, 0x0
>> |          0x0040051f    488b75e8     mov rsi, [rbp-0x18]
>> |          0x00400523    4889e6       mov rsi, rsp
>> |          0x00400526    8945e4       mov [rbp-0x1c], eax
>> |          0x00400529    894de0       mov [rbp-0x20], ecx
>> |          0x0040052c    48897dd8     mov [rbp-0x28], rdi
>> |     .    ; CODE (CALL) XREF from 0x00400555 (fcn.004004bc)
>> |- loc.00400530 51
>> |     .--> 0x00400530    488b45d8     mov rax, [rbp-0x28]
>> |     |    0x00400534    4839e8       cmp rax, rbp
>> |     |    0x00400537    488945d0     mov [rbp-0x30], rax
>> |     |,=< 0x0040053b    0f8419000000 je 0x40055a
>> |     ||   0x00400541    488b45d0     mov rax, [rbp-0x30]
>> |     ||   0x00400545    c70000000000 mov dword [rax], 0x0
>> |     ||   0x0040054b    480508000000 add rax, 0x8
>> |     ||   0x00400551    488945d8     mov [rbp-0x28], rax
>> |     |    ; CODE (CALL) XREF from 0x00400530 (fcn.004004bc)
>> |     `==< 0x00400555    e9d6ffffff   jmp loc.00400530
>> |      `-> 0x0040055a    8b45e0       mov eax, [rbp-0x20]
>> |          0x0040055d    4883c430     add rsp, 0x30
>> |          0x00400561    5d           pop rbp
>> \          0x00400562    c3           ret
>>
>>
>> As we can see, it moves RSP in RSI, but then, generates the rest of the
>> code with RAX, so it fails.
>>
>> Am I missing something?
>>
>> Cheers
>> _______________________________________________
>> LLVM Developers mailing list
>> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
> 



More information about the llvm-dev mailing list