[llvm-bugs] [Bug 34666] New: improvement for several broadcast

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Sep 19 04:44:17 PDT 2017


https://bugs.llvm.org/show_bug.cgi?id=34666

            Bug ID: 34666
           Summary: improvement for several broadcast
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: Jina.nahias at intel.com
                CC: llvm-bugs at lists.llvm.org

for the following c example:
__m512i foo(__m512i __O, __mmask64 __M, char __A){
__m512i res= _mm512_mask_set1_epi8(__O, -1, __A);
__m512i res1= _mm512_mask_set1_epi8(__O, __M, __A);
__m512i res2= _mm512_maskz_set1_epi8( __M, __A);
__m512i res3= res+res1+res2;
return res3;
} 

LLVM currently generates the following IR:
define <8 x i64> @foo(<8 x i64> %__O, i64 %__M, i8 signext %__A) {
  %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0
  %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x
i32> zeroinitializer
  %0 = bitcast <64 x i8> %vecinit63.i.i to <8 x i64>
  %1 = bitcast <8 x i64> %__O to <64 x i8>
  %2 = bitcast i64 %__M to <64 x i1>
  %3 = select <64 x i1> %2, <64 x i8> %vecinit63.i.i, <64 x i8> %1
  %4 = bitcast <64 x i8> %3 to <8 x i64>
  %5 = select <64 x i1> %2, <64 x i8> %vecinit63.i.i, <64 x i8> zeroinitializer
  %6 = bitcast <64 x i8> %5 to <8 x i64>
  %add = add <8 x i64> %4, %0
  %add3 = add <8 x i64> %add, %6
  ret <8 x i64> %add3
} 

which gets translated to this assembly:
vpbroadcastb    %esi, %zmm1
kmovq   %rdi, %k1
vpbroadcastb    %esi, %zmm0 {%k1}
vpaddq  %zmm1, %zmm0, %zmm0
vpbroadcastb    %esi, %zmm1 {%k1} {z}
vpaddq  %zmm1, %zmm0, %zmm0
retq

 if we could generate the following IR:
define <64 x i8> @foo(i8 %a, <64 x i8> %passthru, i64 %mask) {
   %m = bitcast i64 %mask to <64 x i1>
   %add0 = add i8 %a, %a
   %add1 = add i8 %add0, %add0
   %v0 = insertelement <64 x i8> undef, i8 %add1, i32 0
   %v1 = shufflevector <64 x i8> %v0, <64 x i8> undef, <64 x i32>
zeroinitializer
   %w0 = insertelement <64 x i8> undef, i8 %a, i32 0
   %w1 = shufflevector <64 x i8> %w0, <64 x i8> undef, <64 x i32>
zeroinitializer
   %add2 = add <64 x i8> %w1, %passthru
   %v2 = select <64 x i1> %m, <64 x i8> %v1, <64 x i8> %add2
   ret <64 x i8> %v2
 }

we'll get this assembly:
kmovq   %rsi, %k1
        vpbroadcastb    %edi, %zmm1
        shlb    $2, %dil
        vpaddb  %zmm0, %zmm1, %zmm0
        vpbroadcastb    %edi, %zmm0 {%k1}

 which saves broadcast instructions

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170919/657316a5/attachment-0001.html>


More information about the llvm-bugs mailing list