[llvm-dev] AVX512 instruction generated when JIT compiling for an avx2 architecture
Frank Winter via llvm-dev
llvm-dev at lists.llvm.org
Thu Jun 23 09:53:25 PDT 2016
With LLVM 3.8 the JIT compiler engine generates an AVX512 instruction
although I target an 'avx2' CPU (intel Core I7).
I just downloaded the most recent 3.8 and still it happens.
It happens with this input module:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @module_cFFEMJ(i64 %lo, i64 %hi, i64 %myId, i1 %ordered, i64
%start, i32* noalias align 32 %arg0, i32* noalias align 32 %arg1) {
entrypoint:
%0 = add nsw i64 %lo, %start
%1 = add nsw i64 %hi, %start
%2 = select i1 %ordered, i64 %0, i64 %lo
%3 = select i1 %ordered, i64 %1, i64 %hi
%4 = sdiv i64 %2, 4
%5 = sdiv i64 %3, 4
%6 = bitcast i32* %arg1 to i64*
%7 = load i64, i64* %6, align 32
%8 = trunc i64 %7 to i32
%9 = getelementptr i32, i32* %arg1, i64 1
%10 = lshr i64 %7, 32
%11 = trunc i64 %10 to i32
%12 = getelementptr i32, i32* %arg1, i64 2
%13 = bitcast i32* %12 to i64*
%14 = load i64, i64* %13, align 8
%15 = trunc i64 %14 to i32
%16 = getelementptr i32, i32* %arg1, i64 3
%17 = lshr i64 %14, 32
%18 = trunc i64 %17 to i32
br label %L5
L5: ; preds = %L5, %entrypoint
%19 = phi i64 [ %32, %L5 ], [ %4, %entrypoint ]
%20 = shl i64 %19, 4
%21 = or i64 %20, 4
%22 = or i64 %20, 8
%23 = or i64 %20, 12
%broadcast.splatinsert9 = insertelement <4 x i32> undef, i32 %8, i32 0
%broadcast.splat10 = shufflevector <4 x i32> %broadcast.splatinsert9,
<4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert11 = insertelement <4 x i32> undef, i32 %11, i32 0
%broadcast.splat12 = shufflevector <4 x i32>
%broadcast.splatinsert11, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert13 = insertelement <4 x i32> undef, i32 %15, i32 0
%broadcast.splat14 = shufflevector <4 x i32>
%broadcast.splatinsert13, <4 x i32> undef, <4 x i32> zeroinitializer
%broadcast.splatinsert15 = insertelement <4 x i32> undef, i32 %18, i32 0
%broadcast.splat16 = shufflevector <4 x i32>
%broadcast.splatinsert15, <4 x i32> undef, <4 x i32> zeroinitializer
%24 = getelementptr i32, i32* %arg0, i64 %20
%25 = bitcast i32* %24 to <4 x i32>*
store <4 x i32> %broadcast.splat10, <4 x i32>* %25, align 16
%26 = getelementptr i32, i32* %arg0, i64 %21
%27 = bitcast i32* %26 to <4 x i32>*
store <4 x i32> %broadcast.splat12, <4 x i32>* %27, align 16
%28 = getelementptr i32, i32* %arg0, i64 %22
%29 = bitcast i32* %28 to <4 x i32>*
store <4 x i32> %broadcast.splat14, <4 x i32>* %29, align 16
%30 = getelementptr i32, i32* %arg0, i64 %23
%31 = bitcast i32* %30 to <4 x i32>*
store <4 x i32> %broadcast.splat16, <4 x i32>* %31, align 16
%32 = add nsw i64 %19, 1
%33 = icmp slt i64 %32, %5
br i1 %33, label %L5, label %L6
L6: ; preds = %L5
ret void
}
The following code line show how I call the JIT compiler. ('Mod' is
pointing to the module).
llvm::EngineBuilder
engineBuilder(std::move(std::unique_ptr<llvm::Module>(Mod)));
engineBuilder.setMCPU(llvm::sys::getHostCPUName());
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive);
engineBuilder.setErrorStr(&mcjit_error);
llvm::TargetOptions targetOptions;
targetOptions.AllowFPOpFusion = llvm::FPOpFusion::Fast;
engineBuilder.setTargetOptions( targetOptions );
TheExecutionEngine = engineBuilder.create();
targetMachine = engineBuilder.selectTarget();
Mod->setDataLayout( targetMachine->createDataLayout() );
TheExecutionEngine->finalizeObject(); // MCJIT
fptr_mainFunc_extern = TheExecutionEngine->getPointerToFunction(
mainFunc_extern );
When calling the function an 'illegal instruction' is raised.
Looking at the assembler reveals an AVX512 instruction which shouldn't
be there.
Assembly:
.text
.file "module"
.globl main
.align 16, 0x90
.type main, at function
main:
.cfi_startproc
movq 8(%rsp), %r10
leaq (%rdi,%r8), %rdx
addq %rsi, %r8
testb $1, %cl
cmoveq %rdi, %rdx
cmoveq %rsi, %r8
movq %rdx, %rax
sarq $63, %rax
shrq $62, %rax
addq %rdx, %rax
sarq $2, %rax
movq %r8, %rcx
sarq $63, %rcx
shrq $62, %rcx
addq %r8, %rcx
sarq $2, %rcx
movq (%r10), %r8
movq 8(%r10), %r10
movq %r8, %rdi
shrq $32, %rdi
movq %r10, %rsi
shrq $32, %rsi
movq %rax, %rdx
shlq $6, %rdx
leaq 48(%rdx,%r9), %rdx
.align 16, 0x90
.LBB0_1:
vmovd %r8d, %xmm0
vpbroadcastd %xmm0, %xmm0
vmovd %edi, %xmm1
vpbroadcastd %xmm1, %xmm1
vmovd %r10d, %xmm2
vpbroadcastd %xmm2, %xmm2
vmovd %esi, %xmm3
vpbroadcastd %xmm3, %xmm3
vmovdqa32 %xmm0, -48(%rdx)
vmovdqa32 %xmm1, -32(%rdx)
vmovdqa32 %xmm2, -16(%rdx)
vmovdqa32 %xmm3, (%rdx)
addq $1, %rax
addq $64, %rdx
cmpq %rcx, %rax
jl .LBB0_1
retq
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
.section ".note.GNU-stack","", at progbits
end assembly!
I am not sure what instruction is the offending one, but the 'vmovdqa32'
looks avx512.
I wasn't able to reproduce this with 'opt' - it generates avx2
instructions. And when I force it to use e.g. avx512f it rejects the CPU
type.
Any ideas?
Frank
More information about the llvm-dev
mailing list