[llvm] r316274 - [X86][SSE] Add extractps/pextrd equivalence to domain tables

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 21 13:19:48 PDT 2017


Author: rksimon
Date: Sat Oct 21 13:19:48 2017
New Revision: 316274

URL: http://llvm.org/viewvc/llvm-project?rev=316274&view=rev
Log:
[X86][SSE] Add extractps/pextrd equivalence to domain tables

Differential Revision: https://reviews.llvm.org/D39135

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
    llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
    llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/extract-store.ll
    llvm/trunk/test/CodeGen/X86/extractelement-index.ll
    llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
    llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
    llvm/trunk/test/CodeGen/X86/oddshuffles.ll
    llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
    llvm/trunk/test/CodeGen/X86/sse41.ll
    llvm/trunk/test/CodeGen/X86/widen_load-3.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Oct 21 13:19:48 2017
@@ -9451,6 +9451,8 @@ static const uint16_t ReplaceableInstrs[
   { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr },
   { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm },
   { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr },
+  { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr },
+  { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr },
   // AVX 128-bit support
   { X86::VMOVAPSmr,  X86::VMOVAPDmr,  X86::VMOVDQAmr  },
   { X86::VMOVAPSrm,  X86::VMOVAPDrm,  X86::VMOVDQArm  },
@@ -9479,6 +9481,8 @@ static const uint16_t ReplaceableInstrs[
   { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr },
   { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm },
   { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr },
+  { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr },
+  { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr },
   // AVX 256-bit support
   { X86::VMOVAPSYmr,   X86::VMOVAPDYmr,   X86::VMOVDQAYmr  },
   { X86::VMOVAPSYrm,   X86::VMOVAPDYrm,   X86::VMOVDQAYrm  },
@@ -9577,6 +9581,8 @@ static const uint16_t ReplaceableInstrs[
   { X86::VUNPCKLPSZrr,       X86::VUNPCKLPSZrr,       X86::VPUNPCKLDQZrr },
   { X86::VUNPCKHPSZrm,       X86::VUNPCKHPSZrm,       X86::VPUNPCKHDQZrm },
   { X86::VUNPCKHPSZrr,       X86::VUNPCKHPSZrr,       X86::VPUNPCKHDQZrr },
+  { X86::VEXTRACTPSZmr,      X86::VEXTRACTPSZmr,      X86::VPEXTRDZmr },
+  { X86::VEXTRACTPSZrr,      X86::VEXTRACTPSZrr,      X86::VPEXTRDZrr },
 };
 
 static const uint16_t ReplaceableInstrsAVX2[][3] = {

Modified: llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll Sat Oct 21 13:19:48 2017
@@ -49,9 +49,9 @@ entry:
 define void @zero_test() {
 ; X32-LABEL: zero_test:
 ; X32:       # BB#0: # %entry
-; X32-NEXT:    pxor %xmm0, %xmm0
-; X32-NEXT:    pextrd $1, %xmm0, (%eax)
-; X32-NEXT:    movd %xmm0, (%eax)
+; X32-NEXT:    xorps %xmm0, %xmm0
+; X32-NEXT:    extractps $1, %xmm0, (%eax)
+; X32-NEXT:    movss %xmm0, (%eax)
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: zero_test:

Modified: llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll Sat Oct 21 13:19:48 2017
@@ -10,12 +10,12 @@
 define <4 x i32> @test(<4 x i32>* %p) {
 ; CHECK-LABEL: test:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movdqa (%rdi), %xmm0
-; CHECK-NEXT:    pextrd $2, %xmm0, %eax
+; CHECK-NEXT:    movaps (%rdi), %xmm0
+; CHECK-NEXT:    extractps $2, %xmm0, %eax
 ; CHECK-NEXT:    cmpl $3, %eax
 ; CHECK-NEXT:    je .LBB0_2
 ; CHECK-NEXT:  # BB#1:
-; CHECK-NEXT:    pxor %xmm0, %xmm0
+; CHECK-NEXT:    xorps %xmm0, %xmm0
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    retq
   %v = load <4 x i32>, <4 x i32>* %p

Modified: llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-intrinsics-fast-isel.ll Sat Oct 21 13:19:48 2017
@@ -792,14 +792,14 @@ define i32 @test_mm256_extract_epi32(<4
 ; X32-LABEL: test_mm256_extract_epi32:
 ; X32:       # BB#0:
 ; X32-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-NEXT:    vpextrd $1, %xmm0, %eax
+; X32-NEXT:    vextractps $1, %xmm0, %eax
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm256_extract_epi32:
 ; X64:       # BB#0:
 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X64-NEXT:    vpextrd $1, %xmm0, %eax
+; X64-NEXT:    vextractps $1, %xmm0, %eax
 ; X64-NEXT:    vzeroupper
 ; X64-NEXT:    retq
   %arg0 = bitcast <4 x i64> %a0 to <8 x i32>
@@ -811,8 +811,8 @@ define i64 @test_mm256_extract_epi64(<4
 ; X32-LABEL: test_mm256_extract_epi64:
 ; X32:       # BB#0:
 ; X32-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-NEXT:    vpextrd $2, %xmm0, %eax
-; X32-NEXT:    vpextrd $3, %xmm0, %edx
+; X32-NEXT:    vextractps $2, %xmm0, %eax
+; X32-NEXT:    vextractps $3, %xmm0, %edx
 ; X32-NEXT:    vzeroupper
 ; X32-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Sat Oct 21 13:19:48 2017
@@ -424,9 +424,9 @@ define i64 @extract_v2i64(<2 x i64> %x,
 define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
 ; CHECK-LABEL: extract_v16i32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpextrd $1, %xmm0, %eax
-; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; CHECK-NEXT:    vextractps $1, %xmm0, %eax
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vextractps $1, %xmm0, (%rdi)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %r1 = extractelement <16 x i32> %x, i32 1
@@ -438,9 +438,9 @@ define i32 @extract_v16i32(<16 x i32> %x
 define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
 ; CHECK-LABEL: extract_v8i32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpextrd $1, %xmm0, %eax
-; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; CHECK-NEXT:    vextractps $1, %xmm0, %eax
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vextractps $1, %xmm0, (%rdi)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
   %r1 = extractelement <8 x i32> %x, i32 1
@@ -452,8 +452,8 @@ define i32 @extract_v8i32(<8 x i32> %x,
 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
 ; CHECK-LABEL: extract_v4i32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpextrd $1, %xmm0, %eax
-; CHECK-NEXT:    vpextrd $3, %xmm0, (%rdi)
+; CHECK-NEXT:    vextractps $1, %xmm0, %eax
+; CHECK-NEXT:    vextractps $3, %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
   %r1 = extractelement <4 x i32> %x, i32 1
   %r2 = extractelement <4 x i32> %x, i32 3

Modified: llvm/trunk/test/CodeGen/X86/extract-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extract-store.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extract-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extract-store.ll Sat Oct 21 13:19:48 2017
@@ -285,23 +285,23 @@ define void @extract_i32_3(i32* nocaptur
 ; SSE41-X32-LABEL: extract_i32_3:
 ; SSE41-X32:       # BB#0:
 ; SSE41-X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SSE41-X32-NEXT:    pextrd $3, %xmm0, (%eax)
+; SSE41-X32-NEXT:    extractps $3, %xmm0, (%eax)
 ; SSE41-X32-NEXT:    retl
 ;
 ; SSE41-X64-LABEL: extract_i32_3:
 ; SSE41-X64:       # BB#0:
-; SSE41-X64-NEXT:    pextrd $3, %xmm0, (%rdi)
+; SSE41-X64-NEXT:    extractps $3, %xmm0, (%rdi)
 ; SSE41-X64-NEXT:    retq
 ;
 ; AVX-X32-LABEL: extract_i32_3:
 ; AVX-X32:       # BB#0:
 ; AVX-X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX-X32-NEXT:    vpextrd $3, %xmm0, (%eax)
+; AVX-X32-NEXT:    vextractps $3, %xmm0, (%eax)
 ; AVX-X32-NEXT:    retl
 ;
 ; AVX-X64-LABEL: extract_i32_3:
 ; AVX-X64:       # BB#0:
-; AVX-X64-NEXT:    vpextrd $3, %xmm0, (%rdi)
+; AVX-X64-NEXT:    vextractps $3, %xmm0, (%rdi)
 ; AVX-X64-NEXT:    retq
 ;
 ; SSE-F128-LABEL: extract_i32_3:

Modified: llvm/trunk/test/CodeGen/X86/extractelement-index.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extractelement-index.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extractelement-index.ll (original)
+++ llvm/trunk/test/CodeGen/X86/extractelement-index.ll Sat Oct 21 13:19:48 2017
@@ -231,12 +231,12 @@ define i32 @extractelement_v4i32_3(<4 x
 ;
 ; SSE41-LABEL: extractelement_v4i32_3:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrd $3, %xmm0, %eax
+; SSE41-NEXT:    extractps $3, %xmm0, %eax
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: extractelement_v4i32_3:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpextrd $3, %xmm0, %eax
+; AVX-NEXT:    vextractps $3, %xmm0, %eax
 ; AVX-NEXT:    retq
   %b = extractelement <4 x i32> %a, i256 3
   ret i32 %b
@@ -297,22 +297,15 @@ define i32 @extractelement_v8i32_7(<8 x
 ;
 ; SSE41-LABEL: extractelement_v8i32_7:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrd $3, %xmm1, %eax
+; SSE41-NEXT:    extractps $3, %xmm1, %eax
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: extractelement_v8i32_7:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: extractelement_v8i32_7:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; AVX-LABEL: extractelement_v8i32_7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT:    vextractps $3, %xmm0, %eax
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
   %b = extractelement <8 x i32> %a, i64 7
   ret i32 %b
 }

Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Sat Oct 21 13:19:48 2017
@@ -76,7 +76,7 @@ define float @signbits_ashr_extract_sito
 ; X32-LABEL: signbits_ashr_extract_sitofp:
 ; X32:       # BB#0:
 ; X32-NEXT:    pushl %eax
-; X32-NEXT:    vpextrd $1, %xmm0, %eax
+; X32-NEXT:    vextractps $1, %xmm0, %eax
 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
 ; X32-NEXT:    vmovss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)

Modified: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Sat Oct 21 13:19:48 2017
@@ -541,19 +541,19 @@ define void @test_extract_i32(<4 x i32>
 ;
 ; SSE41-LABEL: test_extract_i32:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrd $1, %xmm0, %eax
+; SSE41-NEXT:    extractps $1, %xmm0, %eax
 ; SSE41-NEXT:    movntil %eax, (%rdi)
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_extract_i32:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpextrd $1, %xmm0, %eax
+; AVX-NEXT:    vextractps $1, %xmm0, %eax
 ; AVX-NEXT:    movntil %eax, (%rdi)
 ; AVX-NEXT:    retq
 ;
 ; VLX-LABEL: test_extract_i32:
 ; VLX:       # BB#0:
-; VLX-NEXT:    vpextrd $1, %xmm0, %eax
+; VLX-NEXT:    vextractps $1, %xmm0, %eax
 ; VLX-NEXT:    movntil %eax, (%rdi)
 ; VLX-NEXT:    retq
   %1 = extractelement <4 x i32> %arg, i32 1

Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Sat Oct 21 13:19:48 2017
@@ -112,10 +112,10 @@ define void @v3i32(<2 x i32> %a, <2 x i3
 ;
 ; AVX2-LABEL: v3i32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
-; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
-; AVX2-NEXT:    vpextrd $2, %xmm0, 8(%rdi)
-; AVX2-NEXT:    vmovq %xmm1, (%rdi)
+; AVX2-NEXT:    vbroadcastss %xmm1, %xmm1
+; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT:    vextractps $2, %xmm0, 8(%rdi)
+; AVX2-NEXT:    vmovlps %xmm1, (%rdi)
 ; AVX2-NEXT:    retq
 ;
 ; XOP-LABEL: v3i32:
@@ -199,18 +199,18 @@ define void @v5i32(<4 x i32> %a, <4 x i3
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; AVX1-NEXT:    vpextrd $3, %xmm0, 16(%rdi)
+; AVX1-NEXT:    vextractps $3, %xmm0, 16(%rdi)
 ; AVX1-NEXT:    vmovaps %xmm1, (%rdi)
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: v5i32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
-; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
-; AVX2-NEXT:    vpextrd $3, %xmm0, 16(%rdi)
-; AVX2-NEXT:    vmovdqa %xmm1, (%rdi)
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,5,1,6,3,u,u,u>
+; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vextractps $3, %xmm0, 16(%rdi)
+; AVX2-NEXT:    vmovaps %xmm1, (%rdi)
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -218,7 +218,7 @@ define void @v5i32(<4 x i32> %a, <4 x i3
 ; XOP:       # BB#0:
 ; XOP-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[0,1],xmm1[1,2]
 ; XOP-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; XOP-NEXT:    vpextrd $3, %xmm0, 16(%rdi)
+; XOP-NEXT:    vextractps $3, %xmm0, 16(%rdi)
 ; XOP-NEXT:    vmovaps %xmm1, (%rdi)
 ; XOP-NEXT:    retq
   %r = shufflevector <4 x i32> %a, <4 x i32> %b, <5 x i32> <i32 0, i32 5, i32 1, i32 6, i32 3>

Modified: llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll Sat Oct 21 13:19:48 2017
@@ -440,12 +440,12 @@ define i32 @test_mm_extract_epi8(<2 x i6
 define i32 @test_mm_extract_epi32(<2 x i64> %a0) {
 ; X32-LABEL: test_mm_extract_epi32:
 ; X32:       # BB#0:
-; X32-NEXT:    pextrd $1, %xmm0, %eax
+; X32-NEXT:    extractps $1, %xmm0, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_extract_epi32:
 ; X64:       # BB#0:
-; X64-NEXT:    pextrd $1, %xmm0, %eax
+; X64-NEXT:    extractps $1, %xmm0, %eax
 ; X64-NEXT:    retq
   %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
   %ext = extractelement <4 x i32> %arg0, i32 1
@@ -455,8 +455,8 @@ define i32 @test_mm_extract_epi32(<2 x i
 define i64 @test_mm_extract_epi64(<2 x i64> %a0) {
 ; X32-LABEL: test_mm_extract_epi64:
 ; X32:       # BB#0:
-; X32-NEXT:    pextrd $2, %xmm0, %eax
-; X32-NEXT:    pextrd $3, %xmm0, %edx
+; X32-NEXT:    extractps $2, %xmm0, %eax
+; X32-NEXT:    extractps $3, %xmm0, %edx
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_extract_epi64:

Modified: llvm/trunk/test/CodeGen/X86/sse41-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41-schedule.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41-schedule.ll Sat Oct 21 13:19:48 2017
@@ -949,61 +949,71 @@ define i32 @test_pextrb(<16 x i8> %a0, i
 define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
 ; GENERIC-LABEL: test_pextrd:
 ; GENERIC:       # BB#0:
+; GENERIC-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
 ; GENERIC-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
 ; GENERIC-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SLM-LABEL: test_pextrd:
 ; SLM:       # BB#0:
+; SLM-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
 ; SLM-NEXT:    pextrd $3, %xmm0, %eax # sched: [1:1.00]
 ; SLM-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
 ; SLM-NEXT:    retq # sched: [4:1.00]
 ;
 ; SANDY-LABEL: test_pextrd:
 ; SANDY:       # BB#0:
+; SANDY-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; SANDY-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
 ; SANDY-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
 ; SANDY-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pextrd:
 ; HASWELL:       # BB#0:
+; HASWELL-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; HASWELL-NEXT:    vpextrd $3, %xmm0, %eax # sched: [2:1.00]
 ; HASWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
 ; HASWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; BROADWELL-LABEL: test_pextrd:
 ; BROADWELL:       # BB#0:
+; BROADWELL-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; BROADWELL-NEXT:    vpextrd $3, %xmm0, %eax # sched: [2:1.00]
 ; BROADWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00]
 ; BROADWELL-NEXT:    retq # sched: [2:1.00]
 ;
 ; SKYLAKE-LABEL: test_pextrd:
 ; SKYLAKE:       # BB#0:
+; SKYLAKE-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKYLAKE-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
 ; SKYLAKE-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKX-LABEL: test_pextrd:
 ; SKX:       # BB#0:
+; SKX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
 ; SKX-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ;
 ; BTVER2-LABEL: test_pextrd:
 ; BTVER2:       # BB#0:
+; BTVER2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
 ; BTVER2-NEXT:    vpextrd $3, %xmm0, %eax # sched: [1:0.50]
 ; BTVER2-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
 ; BTVER2-NEXT:    retq # sched: [4:1.00]
 ;
 ; ZNVER1-LABEL: test_pextrd:
 ; ZNVER1:       # BB#0:
+; ZNVER1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
 ; ZNVER1-NEXT:    vpextrd $3, %xmm0, %eax # sched: [1:0.25]
 ; ZNVER1-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [8:1.00]
 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
-  %1 = extractelement <4 x i32> %a0, i32 3
-  %2 = extractelement <4 x i32> %a0, i32 1
-  store i32 %2, i32 *%a1
-  ret i32 %1
+  %1 = add <4 x i32> %a0, %a0
+  %2 = extractelement <4 x i32> %1, i32 3
+  %3 = extractelement <4 x i32> %1, i32 1
+  store i32 %3, i32 *%a1
+  ret i32 %2
 }
 
 define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {

Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse41.ll Sat Oct 21 13:19:48 2017
@@ -108,6 +108,7 @@ define float @ext_1(<4 x float> %v) noun
   %t = fadd float %s, 1.0
   ret float %t
 }
+
 define float @ext_2(<4 x float> %v) nounwind {
 ; X32-LABEL: ext_2:
 ; X32:       ## BB#0:
@@ -125,15 +126,16 @@ define float @ext_2(<4 x float> %v) noun
   %s = extractelement <4 x float> %v, i32 3
   ret float %s
 }
+
 define i32 @ext_3(<4 x i32> %v) nounwind {
 ; X32-LABEL: ext_3:
 ; X32:       ## BB#0:
-; X32-NEXT:    pextrd $3, %xmm0, %eax
+; X32-NEXT:    extractps $3, %xmm0, %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: ext_3:
 ; X64:       ## BB#0:
-; X64-NEXT:    pextrd $3, %xmm0, %eax
+; X64-NEXT:    extractps $3, %xmm0, %eax
 ; X64-NEXT:    retq
   %i = extractelement <4 x i32> %v, i32 3
   ret i32 %i
@@ -261,7 +263,6 @@ define i32 @ptestz_3(<2 x i64> %t1, <2 x
   ret i32 %tmp1
 }
 
-
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone

Modified: llvm/trunk/test/CodeGen/X86/widen_load-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widen_load-3.ll?rev=316274&r1=316273&r2=316274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widen_load-3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widen_load-3.ll Sat Oct 21 13:19:48 2017
@@ -25,33 +25,19 @@ define <7 x i64> @load7_aligned(<7 x i64
 ; X86-SSE-NEXT:    movaps %xmm0, (%eax)
 ; X86-SSE-NEXT:    retl $4
 ;
-; X86-AVX1-LABEL: load7_aligned:
-; X86-AVX1:       # BB#0:
-; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX1-NEXT:    vmovaps (%ecx), %ymm0
-; X86-AVX1-NEXT:    vmovaps 32(%ecx), %ymm1
-; X86-AVX1-NEXT:    vmovaps %ymm0, (%eax)
-; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm0
-; X86-AVX1-NEXT:    vpextrd $1, %xmm0, 52(%eax)
-; X86-AVX1-NEXT:    vmovd %xmm0, 48(%eax)
-; X86-AVX1-NEXT:    vmovaps %xmm1, 32(%eax)
-; X86-AVX1-NEXT:    vzeroupper
-; X86-AVX1-NEXT:    retl $4
-;
-; X86-AVX2-LABEL: load7_aligned:
-; X86-AVX2:       # BB#0:
-; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX2-NEXT:    vmovaps (%ecx), %ymm0
-; X86-AVX2-NEXT:    vmovdqa 32(%ecx), %ymm1
-; X86-AVX2-NEXT:    vmovaps %ymm0, (%eax)
-; X86-AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
-; X86-AVX2-NEXT:    vpextrd $1, %xmm0, 52(%eax)
-; X86-AVX2-NEXT:    vmovd %xmm0, 48(%eax)
-; X86-AVX2-NEXT:    vmovdqa %xmm1, 32(%eax)
-; X86-AVX2-NEXT:    vzeroupper
-; X86-AVX2-NEXT:    retl $4
+; X86-AVX-LABEL: load7_aligned:
+; X86-AVX:       # BB#0:
+; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX-NEXT:    vmovaps (%ecx), %ymm0
+; X86-AVX-NEXT:    vmovaps 32(%ecx), %ymm1
+; X86-AVX-NEXT:    vmovaps %ymm0, (%eax)
+; X86-AVX-NEXT:    vextractf128 $1, %ymm1, %xmm0
+; X86-AVX-NEXT:    vextractps $1, %xmm0, 52(%eax)
+; X86-AVX-NEXT:    vmovss %xmm0, 48(%eax)
+; X86-AVX-NEXT:    vmovaps %xmm1, 32(%eax)
+; X86-AVX-NEXT:    vzeroupper
+; X86-AVX-NEXT:    retl $4
 ;
 ; X64-SSE-LABEL: load7_aligned:
 ; X64-SSE:       # BB#0:




More information about the llvm-commits mailing list