<html>
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
</head>
<body bgcolor="#FFFFFF" text="#000000">
<br>
<br>
<div class="moz-cite-prefix">On 06/23/2016 12:56 PM, Craig Topper
wrote:<br>
</div>
<blockquote
cite="mid:CAF7ks-NGGiAfeSwh3foLEZPrvY3v_o0J0_wV2fQVJAuiGCP4iQ@mail.gmail.com"
type="cite">
<div dir="ltr">Can you check what value "getHostCPUName" returned?</div>
</blockquote>
getHostCPUName() = skylake<br>
<blockquote
cite="mid:CAF7ks-NGGiAfeSwh3foLEZPrvY3v_o0J0_wV2fQVJAuiGCP4iQ@mail.gmail.com"
type="cite">
<div class="gmail_extra"><br>
<div class="gmail_quote">On Thu, Jun 23, 2016 at 9:53 AM, Frank
Winter via llvm-dev <span dir="ltr"><<a
moz-do-not-send="true"
href="mailto:llvm-dev@lists.llvm.org" target="_blank"><a class="moz-txt-link-abbreviated" href="mailto:llvm-dev@lists.llvm.org">llvm-dev@lists.llvm.org</a></a>></span>
wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0
.8ex;border-left:1px #ccc solid;padding-left:1ex">With LLVM
3.8 the JIT compiler engine generates an AVX512 instruction
although I target an 'avx2' CPU (intel Core I7).<br>
I just downloaded the most recent 3.8 and still it happens.<br>
<br>
It happens with this input module:<br>
<br>
<br>
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"<br>
<br>
define void @module_cFFEMJ(i64 %lo, i64 %hi, i64 %myId, i1
%ordered, i64 %start, i32* noalias align 32 %arg0, i32*
noalias align 32 %arg1) {<br>
entrypoint:<br>
%0 = add nsw i64 %lo, %start<br>
%1 = add nsw i64 %hi, %start<br>
%2 = select i1 %ordered, i64 %0, i64 %lo<br>
%3 = select i1 %ordered, i64 %1, i64 %hi<br>
%4 = sdiv i64 %2, 4<br>
%5 = sdiv i64 %3, 4<br>
%6 = bitcast i32* %arg1 to i64*<br>
%7 = load i64, i64* %6, align 32<br>
%8 = trunc i64 %7 to i32<br>
%9 = getelementptr i32, i32* %arg1, i64 1<br>
%10 = lshr i64 %7, 32<br>
%11 = trunc i64 %10 to i32<br>
%12 = getelementptr i32, i32* %arg1, i64 2<br>
%13 = bitcast i32* %12 to i64*<br>
%14 = load i64, i64* %13, align 8<br>
%15 = trunc i64 %14 to i32<br>
%16 = getelementptr i32, i32* %arg1, i64 3<br>
%17 = lshr i64 %14, 32<br>
%18 = trunc i64 %17 to i32<br>
br label %L5<br>
<br>
L5: ; preds =
%L5, %entrypoint<br>
%19 = phi i64 [ %32, %L5 ], [ %4, %entrypoint ]<br>
%20 = shl i64 %19, 4<br>
%21 = or i64 %20, 4<br>
%22 = or i64 %20, 8<br>
%23 = or i64 %20, 12<br>
%broadcast.splatinsert9 = insertelement <4 x i32>
undef, i32 %8, i32 0<br>
%broadcast.splat10 = shufflevector <4 x i32>
%broadcast.splatinsert9, <4 x i32> undef, <4 x
i32> zeroinitializer<br>
%broadcast.splatinsert11 = insertelement <4 x i32>
undef, i32 %11, i32 0<br>
%broadcast.splat12 = shufflevector <4 x i32>
%broadcast.splatinsert11, <4 x i32> undef, <4 x
i32> zeroinitializer<br>
%broadcast.splatinsert13 = insertelement <4 x i32>
undef, i32 %15, i32 0<br>
%broadcast.splat14 = shufflevector <4 x i32>
%broadcast.splatinsert13, <4 x i32> undef, <4 x
i32> zeroinitializer<br>
%broadcast.splatinsert15 = insertelement <4 x i32>
undef, i32 %18, i32 0<br>
%broadcast.splat16 = shufflevector <4 x i32>
%broadcast.splatinsert15, <4 x i32> undef, <4 x
i32> zeroinitializer<br>
%24 = getelementptr i32, i32* %arg0, i64 %20<br>
%25 = bitcast i32* %24 to <4 x i32>*<br>
store <4 x i32> %broadcast.splat10, <4 x i32>*
%25, align 16<br>
%26 = getelementptr i32, i32* %arg0, i64 %21<br>
%27 = bitcast i32* %26 to <4 x i32>*<br>
store <4 x i32> %broadcast.splat12, <4 x i32>*
%27, align 16<br>
%28 = getelementptr i32, i32* %arg0, i64 %22<br>
%29 = bitcast i32* %28 to <4 x i32>*<br>
store <4 x i32> %broadcast.splat14, <4 x i32>*
%29, align 16<br>
%30 = getelementptr i32, i32* %arg0, i64 %23<br>
%31 = bitcast i32* %30 to <4 x i32>*<br>
store <4 x i32> %broadcast.splat16, <4 x i32>*
%31, align 16<br>
%32 = add nsw i64 %19, 1<br>
%33 = icmp slt i64 %32, %5<br>
br i1 %33, label %L5, label %L6<br>
<br>
L6: ; preds =
%L5<br>
ret void<br>
}<br>
<br>
<br>
The following code line show how I call the JIT compiler.
('Mod' is pointing to the module).<br>
<br>
llvm::EngineBuilder
engineBuilder(std::move(std::unique_ptr<llvm::Module>(Mod)));<br>
engineBuilder.setMCPU(llvm::sys::getHostCPUName());<br>
engineBuilder.setEngineKind(llvm::EngineKind::JIT);<br>
engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive);<br>
engineBuilder.setErrorStr(&mcjit_error);<br>
<br>
llvm::TargetOptions targetOptions;<br>
targetOptions.AllowFPOpFusion = llvm::FPOpFusion::Fast;<br>
engineBuilder.setTargetOptions( targetOptions );<br>
<br>
TheExecutionEngine = engineBuilder.create();<br>
<br>
targetMachine = engineBuilder.selectTarget();<br>
Mod->setDataLayout( targetMachine->createDataLayout()
);<br>
<br>
TheExecutionEngine->finalizeObject(); // MCJIT<br>
fptr_mainFunc_extern =
TheExecutionEngine->getPointerToFunction( mainFunc_extern
);<br>
<br>
<br>
When calling the function an 'illegal instruction' is
raised.<br>
Looking at the assembler reveals an AVX512 instruction which
shouldn't be there.<br>
<br>
Assembly:<br>
.text<br>
.file "module"<br>
.globl main<br>
.align 16, 0x90<br>
.type main,@function<br>
main:<br>
.cfi_startproc<br>
movq 8(%rsp), %r10<br>
leaq (%rdi,%r8), %rdx<br>
addq %rsi, %r8<br>
testb $1, %cl<br>
cmoveq %rdi, %rdx<br>
cmoveq %rsi, %r8<br>
movq %rdx, %rax<br>
sarq $63, %rax<br>
shrq $62, %rax<br>
addq %rdx, %rax<br>
sarq $2, %rax<br>
movq %r8, %rcx<br>
sarq $63, %rcx<br>
shrq $62, %rcx<br>
addq %r8, %rcx<br>
sarq $2, %rcx<br>
movq (%r10), %r8<br>
movq 8(%r10), %r10<br>
movq %r8, %rdi<br>
shrq $32, %rdi<br>
movq %r10, %rsi<br>
shrq $32, %rsi<br>
movq %rax, %rdx<br>
shlq $6, %rdx<br>
leaq 48(%rdx,%r9), %rdx<br>
.align 16, 0x90<br>
.LBB0_1:<br>
vmovd %r8d, %xmm0<br>
vpbroadcastd %xmm0, %xmm0<br>
vmovd %edi, %xmm1<br>
vpbroadcastd %xmm1, %xmm1<br>
vmovd %r10d, %xmm2<br>
vpbroadcastd %xmm2, %xmm2<br>
vmovd %esi, %xmm3<br>
vpbroadcastd %xmm3, %xmm3<br>
vmovdqa32 %xmm0, -48(%rdx)<br>
vmovdqa32 %xmm1, -32(%rdx)<br>
vmovdqa32 %xmm2, -16(%rdx)<br>
vmovdqa32 %xmm3, (%rdx)<br>
addq $1, %rax<br>
addq $64, %rdx<br>
cmpq %rcx, %rax<br>
jl .LBB0_1<br>
retq<br>
.Lfunc_end0:<br>
.size main, .Lfunc_end0-main<br>
.cfi_endproc<br>
<br>
<br>
.section ".note.GNU-stack","",@progbits<br>
<br>
end assembly!<br>
<br>
I am not sure what instruction is the offending one, but the
'vmovdqa32' looks avx512.<br>
<br>
I wasn't able to reproduce this with 'opt' - it generates
avx2 instructions. And when I force it to use e.g. avx512f
it rejects the CPU type.<br>
<br>
Any ideas?<br>
<br>
<br>
Frank<br>
_______________________________________________<br>
LLVM Developers mailing list<br>
<a moz-do-not-send="true"
href="mailto:llvm-dev@lists.llvm.org" target="_blank">llvm-dev@lists.llvm.org</a><br>
<a moz-do-not-send="true"
href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev"
rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev</a><br>
</blockquote>
</div>
<br>
<br clear="all">
<div><br>
</div>
-- <br>
<div class="gmail_signature" data-smartmail="gmail_signature">~Craig</div>
</div>
</blockquote>
<br>
</body>
</html>