[llvm-dev] avx512 JIT backend generates wrong code on <4 x float>
Frank Winter via llvm-dev
llvm-dev at lists.llvm.org
Wed Jun 29 12:41:39 PDT 2016
Hi!
When compiling the attached module with the JIT engine on an Intel KNL I
see wrong code getting emitted. I attach a complete exploit program
which shows the bug in LLVM 3.8. It loads and JIT compiles the module
and prints the assembler. I stumbled on this since the result of an
actual calculation was wrong. So, it's not only the text version of the
assembler also the machine assembler is wrong.
When I execute the exploit program on an Intel KNL the following output
is produced:
CPU name = knl
-sse4a,-avx512bw,cx16,-tbm,xsave,-fma4,-avx512vl,prfchw,bmi2,adx,-xsavec,fsgsbase,avx,avx512cd,avx512pf,-rtm,popcnt,fma,bmi,aes,rdrnd,-xsaves,sse4.1,sse4.2,avx2,avx512er,sse,lzcnt,pclmul,avx512f,f16c,ssse3,mmx,-pku,cmov,-xop,rdseed,movbe,-hle,xsaveopt,-sha,sse2,sse3,-avx512dq,
Assembly:
.text
.file "module_KFxOBX_i4_after.ll"
.globl adjmul
.align 16, 0x90
.type adjmul, at function
adjmul:
.cfi_startproc
leaq (%rdi,%r8), %rdx
addq %rsi, %r8
testb $1, %cl
cmoveq %rdi, %rdx
cmoveq %rsi, %r8
movq %rdx, %rax
sarq $63, %rax
shrq $62, %rax
addq %rdx, %rax
sarq $2, %rax
movq %r8, %rcx
sarq $63, %rcx
shrq $62, %rcx
addq %r8, %rcx
sarq $2, %rcx
movq %rax, %rdx
shlq $5, %rdx
leaq 16(%r9,%rdx), %rsi
orq $16, %rdx
movq 16(%rsp), %rdi
addq %rdx, %rdi
addq 8(%rsp), %rdx
.align 16, 0x90
.LBB0_1:
vmovaps -16(%rdx), %xmm0
vmovaps (%rdx), %xmm1
vmovaps -16(%rdi), %xmm2
vmovaps (%rdi), %xmm3
vmulps %xmm3, %xmm1, %xmm4
vmulps %xmm2, %xmm1, %xmm1
vfmadd213ss %xmm4, %xmm0, %xmm2
vfmsub213ss %xmm1, %xmm0, %xmm3
vmovaps %xmm2, -16(%rsi)
vmovaps %xmm3, (%rsi)
addq $1, %rax
addq $32, %rsi
addq $32, %rdi
addq $32, %rdx
cmpq %rcx, %rax
jl .LBB0_1
retq
.Lfunc_end0:
.size adjmul, .Lfunc_end0-adjmul
.cfi_endproc
.section ".note.GNU-stack","", at progbits
end assembly!
The instructions 'vfmadd213ss' are 'Fused Multiply-Add of Scalar
Single-Precision Floating-Point'. Those should be SIMD vector
instructions. Note that the KNL has 16 wide float SIMD, while the
exploit module uses only 4. However, the backend should be able to
handle this.
Unless I receive further ideas I will file an official bug report.
Frank
-------------- next part --------------
LLVMPATH=/home/fwinter/toolchain/install/llvm-3.8-recent
LLVMCONF=$(LLVMPATH)/bin/llvm-config
CXXFLAGS=$(shell $(LLVMCONF) --cxxflags)
LIBS=$(shell $(LLVMCONF) --libs core mcjit native scalaropts vectorize irreader linker) -ldl
LDFLAGS=$(shell $(LLVMCONF) --ldflags)
CXX=g++
OBJS=main.o
TARGET=main
all: $(TARGET)
main: $(OBJS)
$(CXX) -g -o $@ $^ $(LIBS) $(LDFLAGS)
%.o: %.cc
$(CXX) $(CXXFLAGS) -c -g $<
clean:
rm -rf $(TARGET) $(OBJS) *~
-------------- next part --------------
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @adjmul(i64 %lo, i64 %hi, i64 %myId, i1 %ordered, i64 %start, float* noalias align 64 %arg0, float* noalias align 64 %arg1, float* noalias align 64 %arg2) {
entrypoint:
%0 = add nsw i64 %lo, %start
%1 = add nsw i64 %hi, %start
%2 = select i1 %ordered, i64 %0, i64 %lo
%3 = select i1 %ordered, i64 %1, i64 %hi
%4 = sdiv i64 %2, 4
%5 = sdiv i64 %3, 4
br label %L5
L5: ; preds = %L5, %entrypoint
%6 = phi i64 [ %27, %L5 ], [ %4, %entrypoint ]
%7 = shl i64 %6, 3
%8 = or i64 %7, 4
%9 = getelementptr float, float* %arg1, i64 %7
%10 = bitcast float* %9 to <4 x float>*
%wide.load = load <4 x float>, <4 x float>* %10, align 16
%11 = getelementptr float, float* %arg1, i64 %8
%12 = bitcast float* %11 to <4 x float>*
%wide.load5 = load <4 x float>, <4 x float>* %12, align 16
%13 = getelementptr float, float* %arg2, i64 %7
%14 = bitcast float* %13 to <4 x float>*
%wide.load6 = load <4 x float>, <4 x float>* %14, align 16
%15 = getelementptr float, float* %arg2, i64 %8
%16 = bitcast float* %15 to <4 x float>*
%wide.load7 = load <4 x float>, <4 x float>* %16, align 16
%17 = fmul <4 x float> %wide.load, %wide.load6
%18 = fmul <4 x float> %wide.load5, %wide.load7
%19 = fadd <4 x float> %17, %18
%20 = fmul <4 x float> %wide.load, %wide.load7
%21 = fmul <4 x float> %wide.load5, %wide.load6
%22 = fsub <4 x float> %20, %21
%23 = getelementptr float, float* %arg0, i64 %7
%24 = bitcast float* %23 to <4 x float>*
store <4 x float> %19, <4 x float>* %24, align 16
%25 = getelementptr float, float* %arg0, i64 %8
%26 = bitcast float* %25 to <4 x float>*
store <4 x float> %22, <4 x float>* %26, align 16
%27 = add nsw i64 %6, 1
%28 = icmp slt i64 %27, %5
br i1 %28, label %L5, label %L6
L6: ; preds = %L5
ret void
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: main.cc
Type: text/x-c++src
Size: 5059 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20160629/8008aa24/attachment.cc>
More information about the llvm-dev
mailing list