[LLVMdev] Is it a bug or am I missing something ?
David Tweed
david.tweed at arm.com
Tue Feb 19 02:08:18 PST 2013
<<<<<<<<<<<<<<<<<<<<<<<<<<
; ModuleID = 'shufxbug.ll'
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:6
4-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-pc-linux-gnu"
define void @sample_test(<4 x float>* nocapture %source, <8 x float>*
nocapture %dest) nounwind noinline {
L.entry:
%0 = getelementptr <4 x float>* %source, i32 19
%1 = load <4 x float>* %0, align 16
%2 = extractelement <4 x float> %1, i32 0
%3 = insertelement <8 x float> <float 0.000000e+00, float undef, float
undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float
0.000000e+00, float 0.000000e+00>, float %2, i32 2
%4 = insertelement <8 x float> %3, float %2, i32 1
%5 = getelementptr <8 x float>* %dest, i32 19
store <8 x float> %4, <8 x float>* %5, align 4
ret void
}
I'm expecting LLVM to generate code so that at vecor is stored at dest[19]
with following value:
<float 0.000000e+00, float 'elem_0_of_source' , float 'elem_0_of_source',
float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float
0.000000e+00, float 0.000000e+00>
When I use llc trunk as follows on a Corei7 machine I've got following
assembly code:
llc shufxbug.ll -march=x86 -relocation-model=pic -o shufxbug.s
.file "shufxbug.ll"
.text
.globl sample_test
.align 16, 0x90
.type sample_test, at function
sample_test: # @sample_test
# BB#0: # %L.entry
movl 4(%esp), %eax
movss 304(%eax), %xmm0
xorps %xmm1, %xmm1
movl 8(%esp), %eax
movups %xmm1, 624(%eax)
pshufd $65, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,1]
movdqu %xmm0, 608(%eax)
ret
.Ltmp0:
.size sample_test, .Ltmp0-sample_test
.section ".note.GNU-stack","", at progbits
It seems to me that this sequence of instruction is building vector:
<float 'elem_1_of_source', float 'elem_0_of_source' , float
'elem_0_of_source', float 'elem_1_of_source', float 0.000000e+00, float
0.000000e+00, float 0.000000e+00, float 0.000000e+00>
<<<<<<<<<<<<<<<<<<<<<<<<
Umm, isn't the movss (not movps) instruction just loading the bottom lane of
the XMM register, so that IF (presumably due to calling convention) we know
xmm0 is already zeroed this is constructing what's desired? If you print the
output, is it actually not what the IR leads you to expect?
Cheers,
Dave
More information about the llvm-dev
mailing list