[llvm-dev] windows ABI problem with i128?
Andrew Kelley via llvm-dev
llvm-dev at lists.llvm.org
Sat Apr 28 17:59:33 PDT 2018
I figured out the problem:
The definition of __udivti3 is putting the return value on the stack, like
this:
106: 48 8b 01 mov (%rcx),%rax
109: 48 8b 51 08 mov 0x8(%rcx),%rdx
However the callsite expects the result to be in %xmm0, which matches
[microsoft's ABI](https://msdn.microsoft.com/en-us/library/7572ztz4.aspx):
a8: e8 43 00 00 00 callq f0 <__udivti3>
ad: 66 0f 70 c8 4e pshufd $0x4e,%xmm0,%xmm1
b2: 66 0f d6 45 00 movq %xmm0,0x0(%rbp)
b7: 66 0f d6 4d 08 movq %xmm1,0x8(%rbp)
So I tried using inline assembly to set xmm0:
call void asm sideeffect "", "{xmm0}"(i128 %3)
However this gives "couldn't allocate input reg for constraint '{xmm0}'"
Then I tried @llvm.write_register:
call void @llvm.write_register.i128(metadata !83, i128 %3)
!83 = !{!"xmm0\00"}
This crashed llc. Here's a bug report:
https://bugs.llvm.org/show_bug.cgi?id=37285
What's the best way to put a i128 in xmm0?
Regards,
Andrew
On Thu, Apr 26, 2018 at 11:30 AM, Andrew Kelley <superjoe30 at gmail.com>
wrote:
> On Thu, Apr 26, 2018 at 3:44 AM, Anton Korobeynikov <
> anton at korobeynikov.info> wrote:
>
>> Most probably you need to properly specify the calling convention the
>> backend is using for calling the runtime functions.
>
>
> Thanks for the tip. Can you be more specific? Are you suggesting there is
> some config parameter I can set before running TargetMachineEmitToFile?
>
> Do you know what calling convention it is trying to use at the callsite?
> Perhaps I can simply select a different convention from this list for the
> implementation of udivti3? http://llvm.org/docs/LangRef.
> html#calling-conventions
>
> Or implement the
>> stub for udivti3 that performs the necessary argument lifting.
>>
>> I guess there is no standard ABI document describing the intended
>> calling convention here, so I'd just do what mingw64 does here and
>> make everything here compatible.
>>
>
>> On Thu, Apr 26, 2018 at 4:44 AM, Andrew Kelley via llvm-dev
>> <llvm-dev at lists.llvm.org> wrote:
>> > I'm trying to use LLVM to create compiler-rt.o on Windows. I use this
>> > command from the compiler-rt project:
>> >
>> > [nix-shell:~/downloads/llvm-project/compiler-rt]$ clang -nostdlib -S
>> > -emit-llvm lib/builtins/udivti3.c -g -target x86_64-windows
>> > -DCRT_HAS_128BIT
>> >
>> > The resulting LLVM IR is:
>> > =================================================================
>> >
>> > ; ModuleID = 'lib/builtins/udivti3.c'
>> > source_filename = "lib/builtins/udivti3.c"
>> > target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
>> > target triple = "x86_64--windows-msvc19.11.0"
>> >
>> > ; Function Attrs: noinline nounwind optnone uwtable
>> > define i128 @__udivti3(i128, i128) #0 {
>> > %3 = alloca i128, align 16
>> > %4 = alloca i128, align 16
>> > store i128 %1, i128* %3, align 16
>> > store i128 %0, i128* %4, align 16
>> > %5 = load i128, i128* %3, align 16
>> > %6 = load i128, i128* %4, align 16
>> > %7 = call i128 @__udivmodti4(i128 %6, i128 %5, i128* null)
>> > ret i128 %7
>> > }
>> >
>> > declare i128 @__udivmodti4(i128, i128, i128*) #1
>> >
>> > attributes #0 = { noinline nounwind optnone uwtable
>> > "correctly-rounded-divide-sqrt-fp-math"="false"
>> "disable-tail-calls"="false"
>> > "less-precise-fpmad"="false" "no-frame-pointer-elim"="false"
>> > "no-infs-fp-math"="false" "no-jump-tables"="false"
>> "no-nans-fp-math"="false"
>> > "no-signed-zeros-fp-math"="false" "no-trapping-math"="false"
>> > "stack-protector-buffer-size"="8" "target-cpu"="x86-64"
>> > "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false"
>> > "use-soft-float"="false" }
>> > attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false"
>> > "disable-tail-calls"="false" "less-precise-fpmad"="false"
>> > "no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
>> > "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false"
>> > "no-trapping-math"="false" "stack-protector-buffer-size"="8"
>> > "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87"
>> > "unsafe-fp-math"="false" "use-soft-float"="false" }
>> >
>> > !llvm.module.flags = !{!0, !1}
>> > !llvm.ident = !{!2}
>> >
>> > !0 = !{i32 1, !"wchar_size", i32 2}
>> > !1 = !{i32 7, !"PIC Level", i32 2}
>> > !2 = !{!"clang version 6.0.0 (tags/RELEASE_600/final)"}
>> >
>> >
>> > =================================================================
>> > However I think this results in a different ABI than LLVM will use when
>> you
>> > do i128 division. For example, here is my test case (in zig code):
>> > =================================================================
>> >
>> > pub extern "kernel32" stdcallcc fn ExitProcess(exit_code: c_uint)
>> noreturn;
>> >
>> > export fn WinMainCRTStartup() noreturn {
>> > @setAlignStack(16);
>> > @setRuntimeSafety(false);
>> >
>> > var a: u128 = 152313999999999991610955792383;
>> > var b: u128 = 10000000000000000000;
>> > var c = a / b; // this generates a call to __udivti3
>> >
>> > if (c != b) {
>> > @breakpoint();
>> > }
>> > ExitProcess(0);
>> > }
>> >
>> > export fn __udivti3(a: u128, b: u128) u128 {
>> > @setRuntimeSafety(false);
>> > return b;
>> > }
>> >
>> >
>> > =================================================================
>> > This results in this LLVM IR:
>> > =================================================================
>> >
>> > ; ModuleID = 'test'
>> > source_filename = "test"
>> > target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
>> > target triple = "x86_64-pc-windows-msvc"
>> >
>> > %"[]u8" = type { i8*, i64 }
>> > %StackTrace = type { i64, %"[]usize" }
>> > %"[]usize" = type { i64*, i64 }
>> >
>> > ; Function Attrs: nounwind readnone speculatable
>> > declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
>> >
>> > ; Function Attrs: nobuiltin noinline noreturn nounwind uwtable
>> > alignstack(16)
>> > define void @WinMainCRTStartup() #2 !dbg !41 {
>> > Entry:
>> > %a = alloca i128, align 8
>> > %b = alloca i128, align 8
>> > %c = alloca i128, align 8
>> > store i128 152313999999999991610955792383, i128* %a, align 8, !dbg
>> !52
>> > call void @llvm.dbg.declare(metadata i128* %a, metadata !45, metadata
>> > !DIExpression()), !dbg !52
>> > store i128 10000000000000000000, i128* %b, align 8, !dbg !53
>> > call void @llvm.dbg.declare(metadata i128* %b, metadata !48, metadata
>> > !DIExpression()), !dbg !53
>> > %0 = load i128, i128* %a, align 8, !dbg !54
>> > %1 = load i128, i128* %b, align 8, !dbg !55
>> > %2 = udiv i128 %0, %1, !dbg !56
>> > store i128 %2, i128* %c, align 8, !dbg !57
>> > call void @llvm.dbg.declare(metadata i128* %c, metadata !50, metadata
>> > !DIExpression()), !dbg !57
>> > %3 = load i128, i128* %c, align 8, !dbg !58
>> > %4 = load i128, i128* %b, align 8, !dbg !60
>> > %5 = icmp ne i128 %3, %4, !dbg !61
>> > br i1 %5, label %Then, label %Else, !dbg !61
>> >
>> > Then: ; preds = %Entry
>> > call void @llvm.debugtrap(), !dbg !62
>> > br label %EndIf, !dbg !64
>> >
>> > Else: ; preds = %Entry
>> > br label %EndIf, !dbg !64
>> >
>> > EndIf: ; preds = %Else, %Then
>> > call void @ExitProcess(i32 0), !dbg !65
>> > unreachable, !dbg !65
>> > }
>> >
>> > ; Function Attrs: nounwind
>> > declare void @llvm.debugtrap() #3
>> >
>> > ; Function Attrs: nobuiltin noreturn nounwind uwtable
>> > declare void @ExitProcess(i32) #0
>> >
>> > ; Function Attrs: nobuiltin nounwind uwtable
>> > define i128 @__udivti3(i128, i128) #4 !dbg !66 {
>> > Entry:
>> > %a = alloca i128, align 8
>> > %b = alloca i128, align 8
>> > store i128 %0, i128* %a, align 8
>> > call void @llvm.dbg.declare(metadata i128* %a, metadata !70, metadata
>> > !DIExpression()), !dbg !73
>> > store i128 %1, i128* %b, align 8
>> > call void @llvm.dbg.declare(metadata i128* %b, metadata !71, metadata
>> > !DIExpression()), !dbg !74
>> > %2 = load i128, i128* %b, align 8, !dbg !75
>> > ret i128 %2, !dbg !78
>> > }
>> >
>> > ; Function Attrs: nounwind
>> > declare void @llvm.stackprotector(i8*, i8**) #3
>> >
>> > attributes #0 = { nobuiltin noreturn nounwind uwtable
>> > "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
>> > attributes #1 = { nounwind readnone speculatable }
>> > attributes #2 = { nobuiltin noinline noreturn nounwind uwtable
>> alignstack=16
>> > "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
>> > attributes #3 = { nounwind }
>> > attributes #4 = { nobuiltin nounwind uwtable
>> "no-frame-pointer-elim"="true"
>> > "no-frame-pointer-elim-non-leaf" }
>> >
>> > !llvm.module.flags = !{!0}
>> > !llvm.dbg.cu = !{!1}
>> >
>> > =================================================================
>> >
>> > When I link this with (link.exe or LLD, it does not matter):
>> > link.exe /OUT:test.exe /ENTRY:WinMainCRTStartup test.obj
>> /subsystem:console
>> > kernel32.lib /nologo
>> >
>> > And run it, it triggers the breakpoint.
>> >
>> > Meanwhile on linux, this test passes.
>> >
>> > I suspect it may be a calling convention issue. Here is the assembly
>> for the
>> > linux x86_64 version:
>> >
>> >
>> > =================================================================
>> > 0000000000000010 <_start>:
>> > 10: 55 push %rbp
>> > 11: 48 89 e5 mov %rsp,%rbp
>> > 14: 48 83 ec 40 sub $0x40,%rsp
>> > 18: 48 b8 14 30 27 ec 01 movabs $0x1ec273014,%rax
>> > 1f: 00 00 00
>> > 22: 48 89 45 f8 mov %rax,-0x8(%rbp)
>> > 26: 48 b8 ff ff ff ff ff movabs $0xff7377ffffffffff,%rax
>> > 2d: 77 73 ff
>> > 30: 48 89 45 f0 mov %rax,-0x10(%rbp)
>> > 34: 48 b8 00 00 e8 89 04 movabs $0x8ac7230489e80000,%rax
>> > 3b: 23 c7 8a
>> > 3e: 48 89 45 e0 mov %rax,-0x20(%rbp)
>> > 42: 48 c7 45 e8 00 00 00 movq $0x0,-0x18(%rbp)
>> > 49: 00
>> > 4a: 48 8b 7d f0 mov -0x10(%rbp),%rdi
>> > 4e: 48 8b 75 f8 mov -0x8(%rbp),%rsi
>> > 52: 48 8b 55 e0 mov -0x20(%rbp),%rdx
>> > 56: 48 8b 4d e8 mov -0x18(%rbp),%rcx
>> > 5a: e8 00 00 00 00 callq 5f <_start+0x4f>
>> > 5f: 48 89 55 d8 mov %rdx,-0x28(%rbp)
>> > 63: 48 89 45 d0 mov %rax,-0x30(%rbp)
>> > 67: c5 fa 6f 45 d0 vmovdqu -0x30(%rbp),%xmm0
>> > 6c: c5 fa 6f 4d e0 vmovdqu -0x20(%rbp),%xmm1
>> > 71: c5 f9 74 c1 vpcmpeqb %xmm1,%xmm0,%xmm0
>> > 75: c5 79 d7 c0 vpmovmskb %xmm0,%r8d
>> > 79: 41 81 e8 ff ff 00 00 sub $0xffff,%r8d
>> > 80: 44 89 45 cc mov %r8d,-0x34(%rbp)
>> > 84: 74 06 je 8c <_start+0x7c>
>> > 86: eb 00 jmp 88 <_start+0x78>
>> > 88: eb 00 jmp 8a <_start+0x7a>
>> > 8a: eb fe jmp 8a <_start+0x7a>
>> > 8c: eb 00 jmp 8e <_start+0x7e>
>> > 8e: 48 83 c4 40 add $0x40,%rsp
>> > 92: 5d pop %rbp
>> > 93: c3 retq
>> > 94: 66 66 66 2e 0f 1f 84 data16 data16 nopw
>> %cs:0x0(%rax,%rax,1)
>> > 9b: 00 00 00 00 00
>> >
>> > 00000000000000a0 <__udivti3>:
>> > a0: 55 push %rbp
>> > a1: 48 89 e5 mov %rsp,%rbp
>> > a4: 48 89 7d f0 mov %rdi,-0x10(%rbp)
>> > a8: 48 89 75 f8 mov %rsi,-0x8(%rbp)
>> > ac: 48 89 4d e8 mov %rcx,-0x18(%rbp)
>> > b0: 48 89 55 e0 mov %rdx,-0x20(%rbp)
>> > b4: 48 8b 45 e0 mov -0x20(%rbp),%rax
>> > b8: 48 8b 55 e8 mov -0x18(%rbp),%rdx
>> > bc: 5d pop %rbp
>> > bd: c3 retq
>> >
>> >
>> > =================================================================
>> >
>> > And here is the assembly for the windows x86_64 version:
>> >
>> >
>> > =================================================================
>> > 0000000000000010 <_start>:
>> > 10: 55 push %rbp
>> > 11: 48 81 ec 80 00 00 00 sub $0x80,%rsp
>> > 18: 48 8d ac 24 80 00 00 lea 0x80(%rsp),%rbp
>> > 1f: 00
>> > 20: 48 b8 14 30 27 ec 01 movabs $0x1ec273014,%rax
>> > 27: 00 00 00
>> > 2a: 48 89 45 f8 mov %rax,-0x8(%rbp)
>> > 2e: 48 b8 ff ff ff ff ff movabs $0xff7377ffffffffff,%rax
>> > 35: 77 73 ff
>> > 38: 48 89 45 f0 mov %rax,-0x10(%rbp)
>> > 3c: 48 b8 00 00 e8 89 04 movabs $0x8ac7230489e80000,%rax
>> > 43: 23 c7 8a
>> > 46: 48 89 45 e0 mov %rax,-0x20(%rbp)
>> > 4a: 48 c7 45 e8 00 00 00 movq $0x0,-0x18(%rbp)
>> > 51: 00
>> > 52: 48 8b 45 f0 mov -0x10(%rbp),%rax
>> > 56: 48 8b 4d f8 mov -0x8(%rbp),%rcx
>> > 5a: 48 8b 55 e0 mov -0x20(%rbp),%rdx
>> > 5e: 4c 8b 45 e8 mov -0x18(%rbp),%r8
>> > 62: 48 89 4d c8 mov %rcx,-0x38(%rbp)
>> > 66: 48 89 45 c0 mov %rax,-0x40(%rbp)
>> > 6a: 4c 89 45 b8 mov %r8,-0x48(%rbp)
>> > 6e: 48 89 55 b0 mov %rdx,-0x50(%rbp)
>> > 72: 48 8d 4d c0 lea -0x40(%rbp),%rcx
>> > 76: 48 8d 55 b0 lea -0x50(%rbp),%rdx
>> > 7a: e8 41 00 00 00 callq c0 <__udivti3>
>> > 7f: 66 0f 70 c8 4e pshufd $0x4e,%xmm0,%xmm1
>> > 84: 66 0f d6 45 d0 movq %xmm0,-0x30(%rbp)
>> > 89: 66 0f d6 4d d8 movq %xmm1,-0x28(%rbp)
>> > 8e: 0f 10 45 d0 movups -0x30(%rbp),%xmm0
>> > 92: 0f 10 4d e0 movups -0x20(%rbp),%xmm1
>> > 96: 66 0f 74 c1 pcmpeqb %xmm1,%xmm0
>> > 9a: 66 44 0f d7 c8 pmovmskb %xmm0,%r9d
>> > 9f: 41 81 e9 ff ff 00 00 sub $0xffff,%r9d
>> > a6: 44 89 4d ac mov %r9d,-0x54(%rbp)
>> > aa: 74 06 je b2 <_start+0xa2>
>> > ac: eb 00 jmp ae <_start+0x9e>
>> > ae: eb 00 jmp b0 <_start+0xa0>
>> > b0: eb fe jmp b0 <_start+0xa0>
>> > b2: eb 00 jmp b4 <_start+0xa4>
>> > b4: 48 81 c4 80 00 00 00 add $0x80,%rsp
>> > bb: 5d pop %rbp
>> > bc: c3 retq
>> > bd: 90 nop
>> > be: 90 nop
>> > bf: 90 nop
>> >
>> > 00000000000000c0 <__udivti3>:
>> > c0: 55 push %rbp
>> > c1: 48 83 ec 20 sub $0x20,%rsp
>> > c5: 48 8d 6c 24 20 lea 0x20(%rsp),%rbp
>> > ca: 48 89 4d f0 mov %rcx,-0x10(%rbp)
>> > ce: 48 89 55 f8 mov %rdx,-0x8(%rbp)
>> > d2: 4c 89 4d e8 mov %r9,-0x18(%rbp)
>> > d6: 4c 89 45 e0 mov %r8,-0x20(%rbp)
>> > da: 48 8b 45 e0 mov -0x20(%rbp),%rax
>> > de: 48 8b 55 e8 mov -0x18(%rbp),%rdx
>> > e2: 48 83 c4 20 add $0x20,%rsp
>> > e6: 5d pop %rbp
>> > e7: c3 retq
>> >
>> > =================================================================
>> >
>> >
>> > Finally, my question:
>> >
>> > What is the correct LLVM IR to represent i128 values so that it will be
>> > compatible with the compiler-rt calls that LLVM generates? For example,
>> what
>> > should be the LLVM IR definition of __udivti3?
>> >
>> > Because even though clang/compiler-rt project generates `define i128
>> > @__udivti3(i128, i128) #0 {`, this appears to be incorrect when run on
>> > windows.
>> >
>> > Thanks,
>> > Andrew
>> >
>> > _______________________________________________
>> > LLVM Developers mailing list
>> > llvm-dev at lists.llvm.org
>> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>> >
>>
>>
>>
>> --
>> With best regards, Anton Korobeynikov
>> Department of Statistical Modelling, Saint Petersburg State University
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20180428/9dc5bcd0/attachment.html>
More information about the llvm-dev
mailing list