[llvm-bugs] [Bug 41304] New: [SLPVectorizer] reorderInputsAccordingToOpcode - Match extractelement source vectors

via llvm-bugs llvm-bugs at lists.llvm.org
Fri Mar 29 08:18:54 PDT 2019


https://bugs.llvm.org/show_bug.cgi?id=41304

            Bug ID: 41304
           Summary: [SLPVectorizer] reorderInputsAccordingToOpcode - Match
                    extractelement source vectors
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: llvm-dev at redking.me.uk
                CC: a.bataev at hotmail.com, dtemirbulatov at gmail.com,
                    llvm-bugs at lists.llvm.org, spatel+llvm at rotateright.com,
                    v.porpodas at gmail.com

https://godbolt.org/z/L_m3rG

define <4 x i32> @add_v4i32(<4 x i32> %a, <4 x i32> %b) {
  %a0 = extractelement <4 x i32> %a, i32 0
  %a1 = extractelement <4 x i32> %a, i32 1
  %a2 = extractelement <4 x i32> %a, i32 2
  %a3 = extractelement <4 x i32> %a, i32 3
  %b0 = extractelement <4 x i32> %b, i32 0
  %b1 = extractelement <4 x i32> %b, i32 1
  %b2 = extractelement <4 x i32> %b, i32 2
  %b3 = extractelement <4 x i32> %b, i32 3
  %c0 = add i32 %a0, %b0
  %c1 = add i32 %b1, %a1 ; commute me
  %c2 = add i32 %b2, %a2 ; commute me
  %c3 = add i32 %a3, %b3
  %d0 = insertelement <4 x i32> undef, i32 %c0, i32 0
  %d1 = insertelement <4 x i32>   %d0, i32 %c1, i32 1
  %d2 = insertelement <4 x i32>   %d1, i32 %c2, i32 2
  %d3 = insertelement <4 x i32>   %d2, i32 %c3, i32 3
  ret <4 x i32> %d3
}

We miss commutation chances by not checking if extractelement ops come from the
same source vector.

trunk:

define <4 x i32> @add_v4i32(<4 x i32> %a, <4 x i32> %b) {
  %1 = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32
2, i32 7>
  %2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32
2, i32 7>
  %3 = add <4 x i32> %1, %2
  ret <4 x i32> %3
}

when it could be:

define <4 x i32> @add_v4i32(<4 x i32> %a, <4 x i32> %b) {
  %1 = add <4 x i32> %a, %b
  ret <4 x i32> %1
}

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190329/60400f4f/attachment.html>


More information about the llvm-bugs mailing list