<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Thu, Apr 26, 2018 at 3:44 AM, Anton Korobeynikov <span dir="ltr"><<a href="mailto:anton@korobeynikov.info" target="_blank">anton@korobeynikov.info</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Most probably you need to properly specify the calling convention the<br>

backend is using for calling the runtime functions.</blockquote><div><br></div><div>Thanks for the tip. Can you be more specific? Are you suggesting there is some config parameter I can set before running TargetMachineEmitToFile?<br></div><div><br></div><div>Do you know what calling convention it is trying to use at the callsite? Perhaps I can simply select a different convention from this list for the implementation of udivti3? <a href="http://llvm.org/docs/LangRef.html#calling-conventions">http://llvm.org/docs/LangRef.html#calling-conventions</a><br></div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"> Or implement the<br>

stub for udivti3 that performs the necessary argument lifting.<br>

<br>

I guess there is no standard ABI document describing the intended<br>

calling convention here, so I'd just do what mingw64 does here and<br>

make everything here compatible. <br></blockquote><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">

<div><div class="gmail-h5"><br>

On Thu, Apr 26, 2018 at 4:44 AM, Andrew Kelley via llvm-dev<br>

<<a href="mailto:llvm-dev@lists.llvm.org">llvm-dev@lists.llvm.org</a>> wrote:<br>

> I'm trying to use LLVM to create compiler-rt.o on Windows. I use this<br>

> command from the compiler-rt project:<br>

><br>

> [nix-shell:~/downloads/llvm-<wbr>project/compiler-rt]$ clang -nostdlib  -S<br>

> -emit-llvm lib/builtins/udivti3.c  -g -target x86_64-windows<br>

> -DCRT_HAS_128BIT<br>

><br>

> The resulting LLVM IR is:<br>

> ==============================<wbr>==============================<wbr>=====<br>

><br>

> ; ModuleID = 'lib/builtins/udivti3.c'<br>

> source_filename = "lib/builtins/udivti3.c"<br>

> target datalayout = "e-m:w-i64:64-f80:128-n8:16:<wbr>32:64-S128"<br>

> target triple = "x86_64--windows-msvc19.11.0"<br>

><br>

> ; Function Attrs: noinline nounwind optnone uwtable<br>

> define i128 @__udivti3(i128, i128) #0 {<br>

>   %3 = alloca i128, align 16<br>

>   %4 = alloca i128, align 16<br>

>   store i128 %1, i128* %3, align 16<br>

>   store i128 %0, i128* %4, align 16<br>

>   %5 = load i128, i128* %3, align 16<br>

>   %6 = load i128, i128* %4, align 16<br>

>   %7 = call i128 @__udivmodti4(i128 %6, i128 %5, i128* null)<br>

>   ret i128 %7<br>

> }<br>

><br>

> declare i128 @__udivmodti4(i128, i128, i128*) #1<br>

><br>

> attributes #0 = { noinline nounwind optnone uwtable<br>

> "correctly-rounded-divide-<wbr>sqrt-fp-math"="false" "disable-tail-calls"="false"<br>

> "less-precise-fpmad"="false" "no-frame-pointer-elim"="<wbr>false"<br>

> "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false"<br>

> "no-signed-zeros-fp-math"="<wbr>false" "no-trapping-math"="false"<br>

> "stack-protector-buffer-size"=<wbr>"8" "target-cpu"="x86-64"<br>

> "target-features"="+fxsr,+mmx,<wbr>+sse,+sse2,+x87" "unsafe-fp-math"="false"<br>

> "use-soft-float"="false" }<br>

> attributes #1 = { "correctly-rounded-divide-<wbr>sqrt-fp-math"="false"<br>

> "disable-tail-calls"="false" "less-precise-fpmad"="false"<br>

> "no-frame-pointer-elim"="<wbr>false" "no-infs-fp-math"="false"<br>

> "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="<wbr>false"<br>

> "no-trapping-math"="false" "stack-protector-buffer-size"=<wbr>"8"<br>

> "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,<wbr>+sse,+sse2,+x87"<br>

> "unsafe-fp-math"="false" "use-soft-float"="false" }<br>

><br>

> !llvm.module.flags = !{!0, !1}<br>

> !llvm.ident = !{!2}<br>

><br>

> !0 = !{i32 1, !"wchar_size", i32 2}<br>

> !1 = !{i32 7, !"PIC Level", i32 2}<br>

> !2 = !{!"clang version 6.0.0 (tags/RELEASE_600/final)"}<br>

><br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

> However I think this results in a different ABI than LLVM will use when you<br>

> do i128 division. For example, here is my test case (in zig code):<br>

> ==============================<wbr>==============================<wbr>=====<br>

><br>

> pub extern "kernel32" stdcallcc fn ExitProcess(exit_code: c_uint) noreturn;<br>

><br>

> export fn WinMainCRTStartup() noreturn {<br>

>     @setAlignStack(16);<br>

>     @setRuntimeSafety(false);<br>

><br>

>     var a: u128 = 152313999999999991610955792383<wbr>;<br>

>     var b: u128 = 10000000000000000000;<br>

>     var c = a / b; // this generates a call to __udivti3<br>

><br>

>     if (c != b) {<br>

>         @breakpoint();<br>

>     }<br>

>     ExitProcess(0);<br>

> }<br>

><br>

> export fn __udivti3(a: u128, b: u128) u128 {<br>

>     @setRuntimeSafety(false);<br>

>     return b;<br>

> }<br>

><br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

> This results in this LLVM IR:<br>

> ==============================<wbr>==============================<wbr>=====<br>

><br>

> ; ModuleID = 'test'<br>

> source_filename = "test"<br>

> target datalayout = "e-m:w-i64:64-f80:128-n8:16:<wbr>32:64-S128"<br>

> target triple = "x86_64-pc-windows-msvc"<br>

><br>

> %"[]u8" = type { i8*, i64 }<br>

> %StackTrace = type { i64, %"[]usize" }<br>

> %"[]usize" = type { i64*, i64 }<br>

><br>

> ; Function Attrs: nounwind readnone speculatable<br>

> declare void @llvm.dbg.declare(metadata, metadata, metadata) #1<br>

><br>

> ; Function Attrs: nobuiltin noinline noreturn nounwind uwtable<br>

> alignstack(16)<br>

> define void @WinMainCRTStartup() #2 !dbg !41 {<br>

> Entry:<br>

>   %a = alloca i128, align 8<br>

>   %b = alloca i128, align 8<br>

>   %c = alloca i128, align 8<br>

>   store i128 152313999999999991610955792383<wbr>, i128* %a, align 8, !dbg !52<br>

>   call void @llvm.dbg.declare(metadata i128* %a, metadata !45, metadata<br>

> !DIExpression()), !dbg !52<br>

>   store i128 10000000000000000000, i128* %b, align 8, !dbg !53<br>

>   call void @llvm.dbg.declare(metadata i128* %b, metadata !48, metadata<br>

> !DIExpression()), !dbg !53<br>

>   %0 = load i128, i128* %a, align 8, !dbg !54<br>

>   %1 = load i128, i128* %b, align 8, !dbg !55<br>

>   %2 = udiv i128 %0, %1, !dbg !56<br>

>   store i128 %2, i128* %c, align 8, !dbg !57<br>

>   call void @llvm.dbg.declare(metadata i128* %c, metadata !50, metadata<br>

> !DIExpression()), !dbg !57<br>

>   %3 = load i128, i128* %c, align 8, !dbg !58<br>

>   %4 = load i128, i128* %b, align 8, !dbg !60<br>

>   %5 = icmp ne i128 %3, %4, !dbg !61<br>

>   br i1 %5, label %Then, label %Else, !dbg !61<br>

><br>

> Then:                                             ; preds = %Entry<br>

>   call void @llvm.debugtrap(), !dbg !62<br>

>   br label %EndIf, !dbg !64<br>

><br>

> Else:                                             ; preds = %Entry<br>

>   br label %EndIf, !dbg !64<br>

><br>

> EndIf:                                            ; preds = %Else, %Then<br>

>   call void @ExitProcess(i32 0), !dbg !65<br>

>   unreachable, !dbg !65<br>

> }<br>

><br>

> ; Function Attrs: nounwind<br>

> declare void @llvm.debugtrap() #3<br>

><br>

> ; Function Attrs: nobuiltin noreturn nounwind uwtable<br>

> declare void @ExitProcess(i32) #0<br>

><br>

> ; Function Attrs: nobuiltin nounwind uwtable<br>

> define i128 @__udivti3(i128, i128) #4 !dbg !66 {<br>

> Entry:<br>

>   %a = alloca i128, align 8<br>

>   %b = alloca i128, align 8<br>

>   store i128 %0, i128* %a, align 8<br>

>   call void @llvm.dbg.declare(metadata i128* %a, metadata !70, metadata<br>

> !DIExpression()), !dbg !73<br>

>   store i128 %1, i128* %b, align 8<br>

>   call void @llvm.dbg.declare(metadata i128* %b, metadata !71, metadata<br>

> !DIExpression()), !dbg !74<br>

>   %2 = load i128, i128* %b, align 8, !dbg !75<br>

>   ret i128 %2, !dbg !78<br>

> }<br>

><br>

> ; Function Attrs: nounwind<br>

> declare void @llvm.stackprotector(i8*, i8**) #3<br>

><br>

> attributes #0 = { nobuiltin noreturn nounwind uwtable<br>

> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-<wbr>leaf" }<br>

> attributes #1 = { nounwind readnone speculatable }<br>

> attributes #2 = { nobuiltin noinline noreturn nounwind uwtable alignstack=16<br>

> "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-<wbr>leaf" }<br>

> attributes #3 = { nounwind }<br>

> attributes #4 = { nobuiltin nounwind uwtable "no-frame-pointer-elim"="true"<br>

> "no-frame-pointer-elim-non-<wbr>leaf" }<br>

><br>

> !llvm.module.flags = !{!0}<br>

> !<a href="http://llvm.dbg.cu" rel="noreferrer" target="_blank">llvm.dbg.cu</a> = !{!1}<br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

><br>

> When I link this with (link.exe or LLD, it does not matter):<br>

> link.exe /OUT:test.exe /ENTRY:WinMainCRTStartup test.obj /subsystem:console<br>

> kernel32.lib /nologo<br>

><br>

> And run it, it triggers the breakpoint.<br>

><br>

> Meanwhile on linux, this test passes.<br>

><br>

> I suspect it may be a calling convention issue. Here is the assembly for the<br>

> linux x86_64 version:<br>

><br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

> 0000000000000010 <_start>:<br>

>   10:    55                       push   %rbp<br>

>   11:    48 89 e5                 mov    %rsp,%rbp<br>

>   14:    48 83 ec 40              sub    $0x40,%rsp<br>

>   18:    48 b8 14 30 27 ec 01     movabs $0x1ec273014,%rax<br>

>   1f:    00 00 00<br>

>   22:    48 89 45 f8              mov    %rax,-0x8(%rbp)<br>

>   26:    48 b8 ff ff ff ff ff     movabs $0xff7377ffffffffff,%rax<br>

>   2d:    77 73 ff<br>

>   30:    48 89 45 f0              mov    %rax,-0x10(%rbp)<br>

>   34:    48 b8 00 00 e8 89 04     movabs $0x8ac7230489e80000,%rax<br>

>   3b:    23 c7 8a<br>

>   3e:    48 89 45 e0              mov    %rax,-0x20(%rbp)<br>

>   42:    48 c7 45 e8 00 00 00     movq   $0x0,-0x18(%rbp)<br>

>   49:    00<br>

>   4a:    48 8b 7d f0              mov    -0x10(%rbp),%rdi<br>

>   4e:    48 8b 75 f8              mov    -0x8(%rbp),%rsi<br>

>   52:    48 8b 55 e0              mov    -0x20(%rbp),%rdx<br>

>   56:    48 8b 4d e8              mov    -0x18(%rbp),%rcx<br>

>   5a:    e8 00 00 00 00           callq  5f <_start+0x4f><br>

>   5f:    48 89 55 d8              mov    %rdx,-0x28(%rbp)<br>

>   63:    48 89 45 d0              mov    %rax,-0x30(%rbp)<br>

>   67:    c5 fa 6f 45 d0           vmovdqu -0x30(%rbp),%xmm0<br>

>   6c:    c5 fa 6f 4d e0           vmovdqu -0x20(%rbp),%xmm1<br>

>   71:    c5 f9 74 c1              vpcmpeqb %xmm1,%xmm0,%xmm0<br>

>   75:    c5 79 d7 c0              vpmovmskb %xmm0,%r8d<br>

>   79:    41 81 e8 ff ff 00 00     sub    $0xffff,%r8d<br>

>   80:    44 89 45 cc              mov    %r8d,-0x34(%rbp)<br>

>   84:    74 06                    je     8c <_start+0x7c><br>

>   86:    eb 00                    jmp    88 <_start+0x78><br>

>   88:    eb 00                    jmp    8a <_start+0x7a><br>

>   8a:    eb fe                    jmp    8a <_start+0x7a><br>

>   8c:    eb 00                    jmp    8e <_start+0x7e><br>

>   8e:    48 83 c4 40              add    $0x40,%rsp<br>

>   92:    5d                       pop    %rbp<br>

>   93:    c3                       retq<br>

>   94:    66 66 66 2e 0f 1f 84     data16 data16 nopw %cs:0x0(%rax,%rax,1)<br>

>   9b:    00 00 00 00 00<br>

><br>

> 00000000000000a0 <__udivti3>:<br>

>   a0:    55                       push   %rbp<br>

>   a1:    48 89 e5                 mov    %rsp,%rbp<br>

>   a4:    48 89 7d f0              mov    %rdi,-0x10(%rbp)<br>

>   a8:    48 89 75 f8              mov    %rsi,-0x8(%rbp)<br>

>   ac:    48 89 4d e8              mov    %rcx,-0x18(%rbp)<br>

>   b0:    48 89 55 e0              mov    %rdx,-0x20(%rbp)<br>

>   b4:    48 8b 45 e0              mov    -0x20(%rbp),%rax<br>

>   b8:    48 8b 55 e8              mov    -0x18(%rbp),%rdx<br>

>   bc:    5d                       pop    %rbp<br>

>   bd:    c3                       retq<br>

><br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

><br>

> And here is the assembly for the windows x86_64 version:<br>

><br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

> 0000000000000010 <_start>:<br>

>   10:    55                       push   %rbp<br>

>   11:    48 81 ec 80 00 00 00     sub    $0x80,%rsp<br>

>   18:    48 8d ac 24 80 00 00     lea    0x80(%rsp),%rbp<br>

>   1f:    00<br>

>   20:    48 b8 14 30 27 ec 01     movabs $0x1ec273014,%rax<br>

>   27:    00 00 00<br>

>   2a:    48 89 45 f8              mov    %rax,-0x8(%rbp)<br>

>   2e:    48 b8 ff ff ff ff ff     movabs $0xff7377ffffffffff,%rax<br>

>   35:    77 73 ff<br>

>   38:    48 89 45 f0              mov    %rax,-0x10(%rbp)<br>

>   3c:    48 b8 00 00 e8 89 04     movabs $0x8ac7230489e80000,%rax<br>

>   43:    23 c7 8a<br>

>   46:    48 89 45 e0              mov    %rax,-0x20(%rbp)<br>

>   4a:    48 c7 45 e8 00 00 00     movq   $0x0,-0x18(%rbp)<br>

>   51:    00<br>

>   52:    48 8b 45 f0              mov    -0x10(%rbp),%rax<br>

>   56:    48 8b 4d f8              mov    -0x8(%rbp),%rcx<br>

>   5a:    48 8b 55 e0              mov    -0x20(%rbp),%rdx<br>

>   5e:    4c 8b 45 e8              mov    -0x18(%rbp),%r8<br>

>   62:    48 89 4d c8              mov    %rcx,-0x38(%rbp)<br>

>   66:    48 89 45 c0              mov    %rax,-0x40(%rbp)<br>

>   6a:    4c 89 45 b8              mov    %r8,-0x48(%rbp)<br>

>   6e:    48 89 55 b0              mov    %rdx,-0x50(%rbp)<br>

>   72:    48 8d 4d c0              lea    -0x40(%rbp),%rcx<br>

>   76:    48 8d 55 b0              lea    -0x50(%rbp),%rdx<br>

>   7a:    e8 41 00 00 00           callq  c0 <__udivti3><br>

>   7f:    66 0f 70 c8 4e           pshufd $0x4e,%xmm0,%xmm1<br>

>   84:    66 0f d6 45 d0           movq   %xmm0,-0x30(%rbp)<br>

>   89:    66 0f d6 4d d8           movq   %xmm1,-0x28(%rbp)<br>

>   8e:    0f 10 45 d0              movups -0x30(%rbp),%xmm0<br>

>   92:    0f 10 4d e0              movups -0x20(%rbp),%xmm1<br>

>   96:    66 0f 74 c1              pcmpeqb %xmm1,%xmm0<br>

>   9a:    66 44 0f d7 c8           pmovmskb %xmm0,%r9d<br>

>   9f:    41 81 e9 ff ff 00 00     sub    $0xffff,%r9d<br>

>   a6:    44 89 4d ac              mov    %r9d,-0x54(%rbp)<br>

>   aa:    74 06                    je     b2 <_start+0xa2><br>

>   ac:    eb 00                    jmp    ae <_start+0x9e><br>

>   ae:    eb 00                    jmp    b0 <_start+0xa0><br>

>   b0:    eb fe                    jmp    b0 <_start+0xa0><br>

>   b2:    eb 00                    jmp    b4 <_start+0xa4><br>

>   b4:    48 81 c4 80 00 00 00     add    $0x80,%rsp<br>

>   bb:    5d                       pop    %rbp<br>

>   bc:    c3                       retq<br>

>   bd:    90                       nop<br>

>   be:    90                       nop<br>

>   bf:    90                       nop<br>

><br>

> 00000000000000c0 <__udivti3>:<br>

>   c0:    55                       push   %rbp<br>

>   c1:    48 83 ec 20              sub    $0x20,%rsp<br>

>   c5:    48 8d 6c 24 20           lea    0x20(%rsp),%rbp<br>

>   ca:    48 89 4d f0              mov    %rcx,-0x10(%rbp)<br>

>   ce:    48 89 55 f8              mov    %rdx,-0x8(%rbp)<br>

>   d2:    4c 89 4d e8              mov    %r9,-0x18(%rbp)<br>

>   d6:    4c 89 45 e0              mov    %r8,-0x20(%rbp)<br>

>   da:    48 8b 45 e0              mov    -0x20(%rbp),%rax<br>

>   de:    48 8b 55 e8              mov    -0x18(%rbp),%rdx<br>

>   e2:    48 83 c4 20              add    $0x20,%rsp<br>

>   e6:    5d                       pop    %rbp<br>

>   e7:    c3                       retq<br>

><br>

> ==============================<wbr>==============================<wbr>=====<br>

><br>

><br>

> Finally, my question:<br>

><br>

> What is the correct LLVM IR to represent i128 values so that it will be<br>

> compatible with the compiler-rt calls that LLVM generates? For example, what<br>

> should be the LLVM IR definition of  __udivti3?<br>

><br>

> Because even though clang/compiler-rt project generates `define i128<br>

> @__udivti3(i128, i128) #0 {`, this appears to be incorrect when run on<br>

> windows.<br>

><br>

> Thanks,<br>

> Andrew<br>

><br>

</div></div>> ______________________________<wbr>_________________<br>

> LLVM Developers mailing list<br>

> <a href="mailto:llvm-dev@lists.llvm.org">llvm-dev@lists.llvm.org</a><br>

> <a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-dev</a><br>

><br>

<span class="gmail-HOEnZb"><font color="#888888"><br>

<br>

<br>

-- <br>

With best regards, Anton Korobeynikov<br>

Department of Statistical Modelling, Saint Petersburg State University<br>

</font></span></blockquote></div><br></div></div>