[llvm-bugs] [Bug 33434] New: 265 bit double shuffles not optimal

Tue Jun 13 03:10:43 PDT 2017

https://bugs.llvm.org/show_bug.cgi?id=33434

            Bug ID: 33434
           Summary: 265 bit double shuffles not optimal
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: tobias at grosser.es
                CC: llvm-bugs at lists.llvm.org

Hi,

I just tried to generate AVX2 code for some 256 bit AVX2 double shuffles, but
despite Chandler's outstanding work on improving X86 shuffles two years ago,
the shuffle sequences seem not be be optimal (using llc out.ll -o -
-mcpu=x86-64 -mattr=+avx2 on r304555).

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"                     

define void @test_0(<4 x double>* %PA, <4 x double>* %PB) {                     
entry:                                                                          
  %A = load <4 x double>, <4 x double>* %PA                                     
  %B = load <4 x double>, <4 x double>* %PB                                     
  %SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 0, i32
4, i32 2, i32 3>
  %SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 1, i32
5, i32 6, i32 7>

;       vmovddup        %xmm1, %xmm2    # xmm2 = xmm1[0,0]
;       vblendpd        $2, %ymm2, %ymm0, %ymm2 # ymm2 =
ymm0[0],ymm2[1],ymm0[2,3]
;       vpermilpd       $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
;       vblendpd        $1, %ymm0, %ymm1, %ymm0 # ymm0 = ymm0[0],ymm1[1,2,3]

  store <4 x double> %SA, <4 x double>* %PA                                     
  store <4 x double> %SB, <4 x double>* %PB                                     
  ret void                                                                      
}                                                                               

define void @test_1(<4 x double>* %PA, <4 x double>* %PB) {                    
 entry:                                                                         
  %A = load <4 x double>, <4 x double>* %PA                                     
  %B = load <4 x double>, <4 x double>* %PB                                     
  %SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 4, i32
5, i32 0, i32 6>
  %SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 2, i32
3, i32 1, i32 7>

;       vinsertf128     $1, %xmm0, %ymm0, %ymm2
;       vpermilpd       $2, %ymm1, %ymm3 # ymm3 = ymm1[0,1,2,2]
;       vblendpd        $4, %ymm2, %ymm3, %ymm2 # ymm2 =
ymm3[0,1],ymm2[2],ymm3[3]
;       vpermpd $222, %ymm0, %ymm0      # ymm0 = ymm0[2,3,1,3]
;       vblendpd        $8, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2],ymm1[3]

  store <4 x double> %SA, <4 x double>* %PA                                     
  store <4 x double> %SB, <4 x double>* %PB                                     
  ret void                                                                      
}                                                                               

define void @test_2(<4 x double>* %PA, <4 x double>* %PB) {                     
entry:                                                                          
  %A = load <4 x double>, <4 x double>* %PA                                     
  %B = load <4 x double>, <4 x double>* %PB                                     
  %SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 0, i32
1, i32 4, i32 5>
  %SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 2, i32
3, i32 6, i32 7>

;       vinsertf128     $1, %xmm1, %ymm0, %ymm2
;       vperm2f128      $49, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]

  store <4 x double> %SA, <4 x double>* %PA                                     
  store <4 x double> %SB, <4 x double>* %PB                                     
  ret void                                                                      
}

Am I missing something or could these really be translated to at most two
vblendpd instructions?

Best,
Tobias

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170613/335bb472/attachment.html>