[llvm-dev] windows ABI problem with i128?

Thu Apr 26 08:30:41 PDT 2018

On Thu, Apr 26, 2018 at 3:44 AM, Anton Korobeynikov <anton at korobeynikov.info
> wrote:

> Most probably you need to properly specify the calling convention the
> backend is using for calling the runtime functions.

Thanks for the tip. Can you be more specific? Are you suggesting there is
some config parameter I can set before running TargetMachineEmitToFile?

Do you know what calling convention it is trying to use at the callsite?
Perhaps I can simply select a different convention from this list for the
implementation of udivti3?
http://llvm.org/docs/LangRef.html#calling-conventions

Or implement the
> stub for udivti3 that performs the necessary argument lifting.
>
> I guess there is no standard ABI document describing the intended
> calling convention here, so I'd just do what mingw64 does here and
> make everything here compatible.
>

> On Thu, Apr 26, 2018 at 4:44 AM, Andrew Kelley via llvm-dev
> <llvm-dev at lists.llvm.org> wrote:
> > I'm trying to use LLVM to create compiler-rt.o on Windows. I use this
> > command from the compiler-rt project:
> >
> > [nix-shell:~/downloads/llvm-project/compiler-rt]$ clang -nostdlib  -S
> > -emit-llvm lib/builtins/udivti3.c  -g -target x86_64-windows
> > -DCRT_HAS_128BIT
> >
> > The resulting LLVM IR is:
> > =================================================================
> >
> > ; ModuleID = 'lib/builtins/udivti3.c'
> > source_filename = "lib/builtins/udivti3.c"
> > target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
> > target triple = "x86_64--windows-msvc19.11.0"
> >
> > ; Function Attrs: noinline nounwind optnone uwtable
> > define i128 @__udivti3(i128, i128) #0 {
> >   %3 = alloca i128, align 16
> >   %4 = alloca i128, align 16
> >   store i128 %1, i128* %3, align 16
> >   store i128 %0, i128* %4, align 16
> >   %5 = load i128, i128* %3, align 16
> >   %6 = load i128, i128* %4, align 16
> >   %7 = call i128 @__udivmodti4(i128 %6, i128 %5, i128* null)
> >   ret i128 %7
> > }
> >
> > declare i128 @__udivmodti4(i128, i128, i128*) #1
> >
> > attributes #0 = { noinline nounwind optnone uwtable
> > "correctly-rounded-divide-sqrt-fp-math"="false"
> "disable-tail-calls"="false"
> > "less-precise-fpmad"="false" "no-frame-pointer-elim"="false"
> > "no-infs-fp-math"="false" "no-jump-tables"="false"
> "no-nans-fp-math"="false"
> > "no-signed-zeros-fp-math"="false" "no-trapping-math"="false"
> > "stack-protector-buffer-size"="8" "target-cpu"="x86-64"
> > "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false"
> > "use-soft-float"="false" }
> > attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false"
> > "disable-tail-calls"="false" "less-precise-fpmad"="false"
> > "no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
> > "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false"
> > "no-trapping-math"="false" "stack-protector-buffer-size"="8"
> > "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
> > "unsafe-fp-math"="false" "use-soft-float"="false" }
> >
> > !llvm.module.flags = !{!0, !1}
> > !llvm.ident = !{!2}
> >
> > !0 = !{i32 1, !"wchar_size", i32 2}
> > !1 = !{i32 7, !"PIC Level", i32 2}
> > !2 = !{!"clang version 6.0.0 (tags/RELEASE_600/final)"}
> >
> >
> > =================================================================
> > However I think this results in a different ABI than LLVM will use when
> you
> > do i128 division. For example, here is my test case (in zig code):
> > =================================================================
> >
> > pub extern "kernel32" stdcallcc fn ExitProcess(exit_code: c_uint)
> noreturn;
> >
> > export fn WinMainCRTStartup() noreturn {
> >     @setAlignStack(16);
> >     @setRuntimeSafety(false);
> >
> >     var a: u128 = 152313999999999991610955792383;
> >     var b: u128 = 10000000000000000000;
> >     var c = a / b; // this generates a call to __udivti3
> >
> >     if (c != b) {
> >         @breakpoint();
> >     }
> >     ExitProcess(0);
> > }
> >
> > export fn __udivti3(a: u128, b: u128) u128 {
> >     @setRuntimeSafety(false);
> >     return b;
> > }
> >
> >
> > =================================================================
> > This results in this LLVM IR:
> > =================================================================
> >
> > ; ModuleID = 'test'
> > source_filename = "test"
> > target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
> > target triple = "x86_64-pc-windows-msvc"
> >
> > %"[]u8" = type { i8*, i64 }
> > %StackTrace = type { i64, %"[]usize" }
> > %"[]usize" = type { i64*, i64 }
> >
> > ; Function Attrs: nounwind readnone speculatable
> > declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
> >
> > ; Function Attrs: nobuiltin noinline noreturn nounwind uwtable
> > alignstack(16)
> > define void @WinMainCRTStartup() #2 !dbg !41 {
> > Entry:
> >   %a = alloca i128, align 8
> >   %b = alloca i128, align 8
> >   %c = alloca i128, align 8
> >   store i128 152313999999999991610955792383, i128* %a, align 8, !dbg !52
> >   call void @llvm.dbg.declare(metadata i128* %a, metadata !45, metadata
> > !DIExpression()), !dbg !52
> >   store i128 10000000000000000000, i128* %b, align 8, !dbg !53
> >   call void @llvm.dbg.declare(metadata i128* %b, metadata !48, metadata
> > !DIExpression()), !dbg !53
> >   %0 = load i128, i128* %a, align 8, !dbg !54
> >   %1 = load i128, i128* %b, align 8, !dbg !55
> >   %2 = udiv i128 %0, %1, !dbg !56
> >   store i128 %2, i128* %c, align 8, !dbg !57
> >   call void @llvm.dbg.declare(metadata i128* %c, metadata !50, metadata
> > !DIExpression()), !dbg !57
> >   %3 = load i128, i128* %c, align 8, !dbg !58
> >   %4 = load i128, i128* %b, align 8, !dbg !60
> >   %5 = icmp ne i128 %3, %4, !dbg !61
> >   br i1 %5, label %Then, label %Else, !dbg !61
> >
> > Then:                                             ; preds = %Entry
> >   call void @llvm.debugtrap(), !dbg !62
> >   br label %EndIf, !dbg !64
> >
> > Else:                                             ; preds = %Entry
> >   br label %EndIf, !dbg !64
> >
> > EndIf:                                            ; preds = %Else, %Then
> >   call void @ExitProcess(i32 0), !dbg !65
> >   unreachable, !dbg !65
> > }
> >
> > ; Function Attrs: nounwind
> > declare void @llvm.debugtrap() #3
> >
> > ; Function Attrs: nobuiltin noreturn nounwind uwtable
> > declare void @ExitProcess(i32) #0
> >
> > ; Function Attrs: nobuiltin nounwind uwtable
> > define i128 @__udivti3(i128, i128) #4 !dbg !66 {
> > Entry:
> >   %a = alloca i128, align 8
> >   %b = alloca i128, align 8
> >   store i128 %0, i128* %a, align 8
> >   call void @llvm.dbg.declare(metadata i128* %a, metadata !70, metadata
> > !DIExpression()), !dbg !73
> >   store i128 %1, i128* %b, align 8
> >   call void @llvm.dbg.declare(metadata i128* %b, metadata !71, metadata
> > !DIExpression()), !dbg !74
> >   %2 = load i128, i128* %b, align 8, !dbg !75
> >   ret i128 %2, !dbg !78
> > }
> >
> > ; Function Attrs: nounwind
> > declare void @llvm.stackprotector(i8*, i8**) #3
> >
> > attributes #0 = { nobuiltin noreturn nounwind uwtable
> > "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
> > attributes #1 = { nounwind readnone speculatable }
> > attributes #2 = { nobuiltin noinline noreturn nounwind uwtable
> alignstack=16
> > "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
> > attributes #3 = { nounwind }
> > attributes #4 = { nobuiltin nounwind uwtable
> "no-frame-pointer-elim"="true"
> > "no-frame-pointer-elim-non-leaf" }
> >
> > !llvm.module.flags = !{!0}
> > !llvm.dbg.cu = !{!1}
> >
> > =================================================================
> >
> > When I link this with (link.exe or LLD, it does not matter):
> > link.exe /OUT:test.exe /ENTRY:WinMainCRTStartup test.obj
> /subsystem:console
> > kernel32.lib /nologo
> >
> > And run it, it triggers the breakpoint.
> >
> > Meanwhile on linux, this test passes.
> >
> > I suspect it may be a calling convention issue. Here is the assembly for
> the
> > linux x86_64 version:
> >
> >
> > =================================================================
> > 0000000000000010 <_start>:
> >   10:    55                       push   %rbp
> >   11:    48 89 e5                 mov    %rsp,%rbp
> >   14:    48 83 ec 40              sub    $0x40,%rsp
> >   18:    48 b8 14 30 27 ec 01     movabs $0x1ec273014,%rax
> >   1f:    00 00 00
> >   22:    48 89 45 f8              mov    %rax,-0x8(%rbp)
> >   26:    48 b8 ff ff ff ff ff     movabs $0xff7377ffffffffff,%rax
> >   2d:    77 73 ff
> >   30:    48 89 45 f0              mov    %rax,-0x10(%rbp)
> >   34:    48 b8 00 00 e8 89 04     movabs $0x8ac7230489e80000,%rax
> >   3b:    23 c7 8a
> >   3e:    48 89 45 e0              mov    %rax,-0x20(%rbp)
> >   42:    48 c7 45 e8 00 00 00     movq   $0x0,-0x18(%rbp)
> >   49:    00
> >   4a:    48 8b 7d f0              mov    -0x10(%rbp),%rdi
> >   4e:    48 8b 75 f8              mov    -0x8(%rbp),%rsi
> >   52:    48 8b 55 e0              mov    -0x20(%rbp),%rdx
> >   56:    48 8b 4d e8              mov    -0x18(%rbp),%rcx
> >   5a:    e8 00 00 00 00           callq  5f <_start+0x4f>
> >   5f:    48 89 55 d8              mov    %rdx,-0x28(%rbp)
> >   63:    48 89 45 d0              mov    %rax,-0x30(%rbp)
> >   67:    c5 fa 6f 45 d0           vmovdqu -0x30(%rbp),%xmm0
> >   6c:    c5 fa 6f 4d e0           vmovdqu -0x20(%rbp),%xmm1
> >   71:    c5 f9 74 c1              vpcmpeqb %xmm1,%xmm0,%xmm0
> >   75:    c5 79 d7 c0              vpmovmskb %xmm0,%r8d
> >   79:    41 81 e8 ff ff 00 00     sub    $0xffff,%r8d
> >   80:    44 89 45 cc              mov    %r8d,-0x34(%rbp)
> >   84:    74 06                    je     8c <_start+0x7c>
> >   86:    eb 00                    jmp    88 <_start+0x78>
> >   88:    eb 00                    jmp    8a <_start+0x7a>
> >   8a:    eb fe                    jmp    8a <_start+0x7a>
> >   8c:    eb 00                    jmp    8e <_start+0x7e>
> >   8e:    48 83 c4 40              add    $0x40,%rsp
> >   92:    5d                       pop    %rbp
> >   93:    c3                       retq
> >   94:    66 66 66 2e 0f 1f 84     data16 data16 nopw %cs:0x0(%rax,%rax,1)
> >   9b:    00 00 00 00 00
> >
> > 00000000000000a0 <__udivti3>:
> >   a0:    55                       push   %rbp
> >   a1:    48 89 e5                 mov    %rsp,%rbp
> >   a4:    48 89 7d f0              mov    %rdi,-0x10(%rbp)
> >   a8:    48 89 75 f8              mov    %rsi,-0x8(%rbp)
> >   ac:    48 89 4d e8              mov    %rcx,-0x18(%rbp)
> >   b0:    48 89 55 e0              mov    %rdx,-0x20(%rbp)
> >   b4:    48 8b 45 e0              mov    -0x20(%rbp),%rax
> >   b8:    48 8b 55 e8              mov    -0x18(%rbp),%rdx
> >   bc:    5d                       pop    %rbp
> >   bd:    c3                       retq
> >
> >
> > =================================================================
> >
> > And here is the assembly for the windows x86_64 version:
> >
> >
> > =================================================================
> > 0000000000000010 <_start>:
> >   10:    55                       push   %rbp
> >   11:    48 81 ec 80 00 00 00     sub    $0x80,%rsp
> >   18:    48 8d ac 24 80 00 00     lea    0x80(%rsp),%rbp
> >   1f:    00
> >   20:    48 b8 14 30 27 ec 01     movabs $0x1ec273014,%rax
> >   27:    00 00 00
> >   2a:    48 89 45 f8              mov    %rax,-0x8(%rbp)
> >   2e:    48 b8 ff ff ff ff ff     movabs $0xff7377ffffffffff,%rax
> >   35:    77 73 ff
> >   38:    48 89 45 f0              mov    %rax,-0x10(%rbp)
> >   3c:    48 b8 00 00 e8 89 04     movabs $0x8ac7230489e80000,%rax
> >   43:    23 c7 8a
> >   46:    48 89 45 e0              mov    %rax,-0x20(%rbp)
> >   4a:    48 c7 45 e8 00 00 00     movq   $0x0,-0x18(%rbp)
> >   51:    00
> >   52:    48 8b 45 f0              mov    -0x10(%rbp),%rax
> >   56:    48 8b 4d f8              mov    -0x8(%rbp),%rcx
> >   5a:    48 8b 55 e0              mov    -0x20(%rbp),%rdx
> >   5e:    4c 8b 45 e8              mov    -0x18(%rbp),%r8
> >   62:    48 89 4d c8              mov    %rcx,-0x38(%rbp)
> >   66:    48 89 45 c0              mov    %rax,-0x40(%rbp)
> >   6a:    4c 89 45 b8              mov    %r8,-0x48(%rbp)
> >   6e:    48 89 55 b0              mov    %rdx,-0x50(%rbp)
> >   72:    48 8d 4d c0              lea    -0x40(%rbp),%rcx
> >   76:    48 8d 55 b0              lea    -0x50(%rbp),%rdx
> >   7a:    e8 41 00 00 00           callq  c0 <__udivti3>
> >   7f:    66 0f 70 c8 4e           pshufd $0x4e,%xmm0,%xmm1
> >   84:    66 0f d6 45 d0           movq   %xmm0,-0x30(%rbp)
> >   89:    66 0f d6 4d d8           movq   %xmm1,-0x28(%rbp)
> >   8e:    0f 10 45 d0              movups -0x30(%rbp),%xmm0
> >   92:    0f 10 4d e0              movups -0x20(%rbp),%xmm1
> >   96:    66 0f 74 c1              pcmpeqb %xmm1,%xmm0
> >   9a:    66 44 0f d7 c8           pmovmskb %xmm0,%r9d
> >   9f:    41 81 e9 ff ff 00 00     sub    $0xffff,%r9d
> >   a6:    44 89 4d ac              mov    %r9d,-0x54(%rbp)
> >   aa:    74 06                    je     b2 <_start+0xa2>
> >   ac:    eb 00                    jmp    ae <_start+0x9e>
> >   ae:    eb 00                    jmp    b0 <_start+0xa0>
> >   b0:    eb fe                    jmp    b0 <_start+0xa0>
> >   b2:    eb 00                    jmp    b4 <_start+0xa4>
> >   b4:    48 81 c4 80 00 00 00     add    $0x80,%rsp
> >   bb:    5d                       pop    %rbp
> >   bc:    c3                       retq
> >   bd:    90                       nop
> >   be:    90                       nop
> >   bf:    90                       nop
> >
> > 00000000000000c0 <__udivti3>:
> >   c0:    55                       push   %rbp
> >   c1:    48 83 ec 20              sub    $0x20,%rsp
> >   c5:    48 8d 6c 24 20           lea    0x20(%rsp),%rbp
> >   ca:    48 89 4d f0              mov    %rcx,-0x10(%rbp)
> >   ce:    48 89 55 f8              mov    %rdx,-0x8(%rbp)
> >   d2:    4c 89 4d e8              mov    %r9,-0x18(%rbp)
> >   d6:    4c 89 45 e0              mov    %r8,-0x20(%rbp)
> >   da:    48 8b 45 e0              mov    -0x20(%rbp),%rax
> >   de:    48 8b 55 e8              mov    -0x18(%rbp),%rdx
> >   e2:    48 83 c4 20              add    $0x20,%rsp
> >   e6:    5d                       pop    %rbp
> >   e7:    c3                       retq
> >
> > =================================================================
> >
> >
> > Finally, my question:
> >
> > What is the correct LLVM IR to represent i128 values so that it will be
> > compatible with the compiler-rt calls that LLVM generates? For example,
> what
> > should be the LLVM IR definition of  __udivti3?
> >
> > Because even though clang/compiler-rt project generates `define i128
> > @__udivti3(i128, i128) #0 {`, this appears to be incorrect when run on
> > windows.
> >
> > Thanks,
> > Andrew
> >
> > _______________________________________________
> > LLVM Developers mailing list
> > llvm-dev at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
> >
>
>
>
> --
> With best regards, Anton Korobeynikov
> Department of Statistical Modelling, Saint Petersburg State University
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20180426/b98506cf/attachment.html>