[LLVMdev] Can LLVM vectorize <2 x i32> type

suyog sarda sardask01 at gmail.com
Fri Jun 26 11:56:04 PDT 2015


> For example, I have the following IR code,
>
> for.cond.preheader:                               ; preds = %if.end18
>   %mul = mul i32 %12, %3
>   %cmp21128 = icmp sgt i32 %mul, 0
>   br i1 %cmp21128, label %for.body.preheader, label %return
>
> for.body.preheader:                               ; preds =
%for.cond.preheader
>   %19 = mul i32 %12, %3
>   %20 = add i32 %19, -1
>   %21 = zext i32 %20 to i64
>   %22 = add i64 %21, 1
>   %end.idx = add i64 %21, 1
>   %n.vec = and i64 %22, 8589934584
>   %cmp.zero = icmp eq i64 %n.vec, 0
>   br i1 %cmp.zero, label %middle.block, label %vector.ph
>
> The corresponding assembly code is:
> # BB#3:                                 # %for.cond.preheader

>     imull   %r9d, %ebx

>     testl   %ebx, %ebx

>     jle .LBB10_63

> # BB#4:                                 # %for.body.preheader

>     leal    -1(%rbx), %eax

>     incq    %rax

>     xorl    %edx, %edx

>     movabsq $8589934584, %rcx       # imm = 0x1FFFFFFF8

>     andq    %rax, %rcx

>     je  .LBB10_8
>
> I changed all the scalar operands to <2 x ValueType> ones. The IR becomes
the following
> for.cond.preheader:                               ; preds = %if.end18
>   %mulS44_D = mul <2 x i32> %splatLDS24_D.splat, %splatLDS7_D.splat
>   %cmp21128S45_D = icmp sgt <2 x i32> %mulS44_D, zeroinitializer
>   %sextS46_D = sext <2 x i1> %cmp21128S45_D to <2 x i64>
>   %BCS46_D = bitcast <2 x i64> %sextS46_D to i128
>   %mskS46_D = icmp ne i128 %BCS46_D, 0
>   br i1 %mskS46_D, label %for.body.preheader, label %return
>
> for.body.preheader:                               ; preds =
%for.cond.preheader
>   %S47_D = mul <2 x i32> %splatLDS24_D.splat, %splatLDS7_D.splat
>   %S48_D = add <2 x i32> %S47_D, <i32 -1, i32 -1>
>   %S49_D = zext <2 x i32> %S48_D to <2 x i64>
>   %S50_D = add <2 x i64> %S49_D, <i64 1, i64 1>
>   %end.idxS51_D = add <2 x i64> %S49_D, <i64 1, i64 1>
>   %n.vecS52_D = and <2 x i64> %S50_D, <i64 8589934584, i64 8589934584>
>   %cmp.zeroS53_D = icmp eq <2 x i64> %n.vecS52_D, zeroinitializer
>   %sextS54_D = sext <2 x i1> %cmp.zeroS53_D to <2 x i64>
>   %BCS54_D = bitcast <2 x i64> %sextS54_D to i128
>   %mskS54_D = icmp ne i128 %BCS54_D, 0
>   br i1 %mskS54_D, label %middle.block, label %vector.ph
>
> Now the assembly for the above IR code is:
> # BB#4:                                 # %for.cond.preheader
>     vmovdqa 144(%rsp), %xmm0        # 16-byte Reload
>     vpmuludq    %xmm7, %xmm0, %xmm2
>     vpsrlq  $32, %xmm7, %xmm4
>     vpmuludq    %xmm4, %xmm0, %xmm4
>     vpsllq  $32, %xmm4, %xmm4
>     vpaddq  %xmm4, %xmm2, %xmm2
>     vpsrlq  $32, %xmm0, %xmm4
>     vpmuludq    %xmm7, %xmm4, %xmm4
>     vpsllq  $32, %xmm4, %xmm4
>     vpaddq  %xmm4, %xmm2, %xmm2
>     vpextrq $1, %xmm2, %rax
>     cltq
>     vmovq   %rax, %xmm4
>     vmovq   %xmm2, %rax
>     cltq
>     vmovq   %rax, %xmm5
>     vpunpcklqdq %xmm4, %xmm5, %xmm4 # xmm4 = xmm5[0],xmm4[0]
>     vpcmpgtq    %xmm3, %xmm4, %xmm3
>     vptest  %xmm3, %xmm3
>     je  .LBB10_66
> # BB#5:                                 # %for.body.preheader
>     vpaddq  %xmm15, %xmm2, %xmm3
>     vpand   %xmm15, %xmm3, %xmm3
>     vpaddq  .LCPI10_1(%rip), %xmm3, %xmm8
>     vpand   .LCPI10_5(%rip), %xmm8, %xmm5
>     vpxor   %xmm4, %xmm4, %xmm4
>     vpcmpeqq    %xmm4, %xmm5, %xmm6
>     vptest  %xmm6, %xmm6
>     jne .LBB10_9
>

As Mats pointed out, this may be the same problem as:
https <https://llvm.org/bugs/show_bug.cgi?id=22703>://
<https://llvm.org/bugs/show_bug.cgi?id=22703>llvm.org
<https://llvm.org/bugs/show_bug.cgi?id=22703>/bugs/show_
<https://llvm.org/bugs/show_bug.cgi?id=22703>bug.cgi
<https://llvm.org/bugs/show_bug.cgi?id=22703>?id=22703
<https://llvm.org/bugs/show_bug.cgi?id=22703>

Basically, the code is generated for AVX2 where register XMM are 128 bits.
Some of the above ops are <2 x i32> and involve sext to <2 x i64>, bitcast,
etc. Hence the code has extra vector instructions.

Regards,
Suyog Sarda
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20150627/e3c54be6/attachment.html>


More information about the llvm-dev mailing list