[llvm] r272651 - [X86][SSE4A] Added patterns for nontemporal stores of scalar float/doubles using MOVNTSD/MOVNTSS

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 14 02:43:38 PDT 2016


Author: rksimon
Date: Tue Jun 14 04:43:38 2016
New Revision: 272651

URL: http://llvm.org/viewvc/llvm-project?rev=272651&view=rev
Log:
[X86][SSE4A] Added patterns for nontemporal stores of scalar float/doubles using MOVNTSD/MOVNTSS

Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/nontemporal-2.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=272651&r1=272650&r2=272651&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jun 14 04:43:38 2016
@@ -7774,6 +7774,8 @@ def INSERTQ  : I<0x79, MRMSrcReg, (outs
                                     VR128:$mask))]>, XD;
 }
 
+// Non-temporal (unaligned) scalar stores.
+let AddedComplexity = 400 in { // Prefer non-temporal versions
 def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
                 "movntss\t{$src, $dst|$dst, $src}",
                 [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS;
@@ -7781,7 +7783,15 @@ def MOVNTSS : I<0x2B, MRMDestMem, (outs)
 def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                 "movntsd\t{$src, $dst|$dst, $src}",
                 [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD;
-}
+
+def : Pat<(nontemporalstore FR32:$src, addr:$dst),
+          (MOVNTSS addr:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
+def : Pat<(nontemporalstore FR64:$src, addr:$dst),
+          (MOVNTSD addr:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
+
+} // AddedComplexity
+} // HasSSE4A
 
 //===----------------------------------------------------------------------===//
 // AVX Instructions
@@ -8364,7 +8374,7 @@ let Predicates = [HasAVX2, NoVLX] in {
               (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
     def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
               (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
-    }  
+    }
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI], AddedComplexity = 20 in {

Modified: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=272651&r1=272650&r2=272651&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Tue Jun 14 04:43:38 2016
@@ -386,10 +386,20 @@ define void @test_zero_v32i8(<32 x i8>*
 ; Scalar versions.
 
 define void @test_arg_f32(float %arg, float* %dst) {
-; SSE-LABEL: test_arg_f32:
-; SSE:       # BB#0:
-; SSE-NEXT:    movss %xmm0, (%rdi)
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_f32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_arg_f32:
+; SSE4A:       # BB#0:
+; SSE4A-NEXT:    movntss %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_f32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movss %xmm0, (%rdi)
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_f32:
 ; AVX:       # BB#0:
@@ -424,10 +434,20 @@ define void @test_arg_i32(i32 %arg, i32*
 }
 
 define void @test_arg_f64(double %arg, double* %dst) {
-; SSE-LABEL: test_arg_f64:
-; SSE:       # BB#0:
-; SSE-NEXT:    movsd %xmm0, (%rdi)
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_arg_f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_arg_f64:
+; SSE4A:       # BB#0:
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_arg_f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movsd %xmm0, (%rdi)
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_arg_f64:
 ; AVX:       # BB#0:
@@ -473,7 +493,7 @@ define void @test_extract_f32(<4 x float
 ; SSE4A-LABEL: test_extract_f32:
 ; SSE4A:       # BB#0:
 ; SSE4A-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; SSE4A-NEXT:    movss %xmm0, (%rdi)
+; SSE4A-NEXT:    movntss %xmm0, (%rdi)
 ; SSE4A-NEXT:    retq
 ;
 ; SSE41-LABEL: test_extract_f32:
@@ -536,10 +556,21 @@ define void @test_extract_i32(<4 x i32>
 }
 
 define void @test_extract_f64(<2 x double> %arg, double* %dst) {
-; SSE-LABEL: test_extract_f64:
-; SSE:       # BB#0:
-; SSE-NEXT:    movhpd %xmm0, (%rdi)
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_extract_f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movhpd %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_extract_f64:
+; SSE4A:       # BB#0:
+; SSE4A-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_extract_f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movhpd %xmm0, (%rdi)
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_extract_f64:
 ; AVX:       # BB#0:




More information about the llvm-commits mailing list