[llvm] r304798 - [X86][AVX1] Split 256-bit vector non-temporal FastISel loads to keep it non-temporal (PR32744)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 6 07:18:39 PDT 2017


Author: rksimon
Date: Tue Jun  6 09:18:39 2017
New Revision: 304798

URL: http://llvm.org/viewvc/llvm-project?rev=304798&view=rev
Log:
[X86][AVX1] Split 256-bit vector non-temporal FastISel loads to keep it non-temporal (PR32744)

Extension to D33728

Modified:
    llvm/trunk/lib/Target/X86/X86FastISel.cpp
    llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll

Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=304798&r1=304797&r2=304798&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Tue Jun  6 09:18:39 2017
@@ -414,6 +414,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
+    else if (IsNonTemporal && Alignment >= 16)
+      return false; // Force split for X86::VMOVNTDQArm
     else if (Alignment >= 32)
       Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
     else
@@ -424,6 +426,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = X86::VMOVNTDQAYrm;
+    else if (IsNonTemporal && Alignment >= 16)
+      return false; // Force split for X86::VMOVNTDQArm
     else if (Alignment >= 32)
       Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
     else
@@ -437,6 +441,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = X86::VMOVNTDQAYrm;
+    else if (IsNonTemporal && Alignment >= 16)
+      return false; // Force split for X86::VMOVNTDQArm
     else if (Alignment >= 32)
       Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
     else

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll?rev=304798&r1=304797&r2=304798&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-nontemporal.ll Tue Jun  6 09:18:39 2017
@@ -545,7 +545,11 @@ define <8 x float> @test_load_nt8xfloat(
 ;
 ; AVX1-LABEL: test_load_nt8xfloat:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovaps (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xfloat:
@@ -583,7 +587,11 @@ define <4 x double> @test_load_nt4xdoubl
 ;
 ; AVX1-LABEL: test_load_nt4xdouble:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovapd (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt4xdouble:
@@ -621,7 +629,11 @@ define <32 x i8> @test_load_nt32xi8(<32
 ;
 ; AVX1-LABEL: test_load_nt32xi8:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt32xi8:
@@ -659,7 +671,11 @@ define <16 x i16> @test_load_nt16xi16(<1
 ;
 ; AVX1-LABEL: test_load_nt16xi16:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt16xi16:
@@ -697,7 +713,11 @@ define <8 x i32> @test_load_nt8xi32(<8 x
 ;
 ; AVX1-LABEL: test_load_nt8xi32:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt8xi32:
@@ -735,7 +755,11 @@ define <4 x i64> @test_load_nt4xi64(<4 x
 ;
 ; AVX1-LABEL: test_load_nt4xi64:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX1-NEXT:    # implicit-def: %YMM1
+; AVX1-NEXT:    vmovaps %xmm0, %xmm1
+; AVX1-NEXT:    vmovntdqa 16(%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: test_load_nt4xi64:




More information about the llvm-commits mailing list