[LLVMdev] Is it a bug or am I missing something ?
Sebastien DELDON-GNB
sebastien.deldon at st.com
Tue Feb 19 01:52:28 PST 2013
Hi all,
on following code:
; ModuleID = 'shufxbug.ll'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
define void @sample_test(<4 x float>* nocapture %source, <8 x float>* nocapture %dest) nounwind noinline {
L.entry:
%0 = getelementptr <4 x float>* %source, i32 19
%1 = load <4 x float>* %0, align 16
%2 = extractelement <4 x float> %1, i32 0
%3 = insertelement <8 x float> <float 0.000000e+00, float undef, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %2, i32 2
%4 = insertelement <8 x float> %3, float %2, i32 1
%5 = getelementptr <8 x float>* %dest, i32 19
store <8 x float> %4, <8 x float>* %5, align 4
ret void
}
I'm expecting LLVM to generate code so that at vecor is stored at dest[19] with following value:
<float 0.000000e+00, float 'elem_0_of_source' , float 'elem_0_of_source', float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
When I use llc trunk as follows on a Corei7 machine I've got following assembly code:
llc shufxbug.ll -march=x86 -relocation-model=pic -o shufxbug.s
.file "shufxbug.ll"
.text
.globl sample_test
.align 16, 0x90
.type sample_test, at function
sample_test: # @sample_test
# BB#0: # %L.entry
movl 4(%esp), %eax
movss 304(%eax), %xmm0
xorps %xmm1, %xmm1
movl 8(%esp), %eax
movups %xmm1, 624(%eax)
pshufd $65, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,1]
movdqu %xmm0, 608(%eax)
ret
.Ltmp0:
.size sample_test, .Ltmp0-sample_test
.section ".note.GNU-stack","", at progbits
It seems to me that this sequence of instruction is building vector:
<float 'elem_1_of_source', float 'elem_0_of_source' , float 'elem_0_of_source', float 'elem_1_of_source', float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
On a sandy bridge system, I've got similar behavior with a slightly different code (using AVX):
pushl %ebp
.Ltmp5:
.cfi_def_cfa_offset 8
.Ltmp6:
.cfi_offset %ebp, -8
movl %esp, %ebp
.Ltmp7:
.cfi_def_cfa_register %ebp
movl 12(%ebp), %eax
.loc 1 9 0 prologue_end # shufxbug.cl:9:0
.Ltmp8:
vpermilps $65, 304(%eax), %xmm0 # xmm0 = mem[1,0,0,1]
vxorps %xmm1, %xmm1, %xmm1
vinsertf128 $1, %xmm1, %ymm0, %ymm0
movl 16(%ebp), %eax
.loc 1 10 0 # shufxbug.cl:10:0
vmovups %ymm0, 608(%eax)
.loc 1 11 0 # shufxbug.cl:11:0
popl %ebp
vzeroupper
ret
It seems to me that generated code is not correct in both case can someone confirm or indicate what I did wrong if not a bug ?
If this ends up being an actual BUG I'll submit it in BUG tracking system.
Thanks
Seb
More information about the llvm-dev
mailing list