[llvm] r220054 - [X86] Fix missed selection of non-temporal store of zero vector.

Andrea Di Biagio Andrea_DiBiagio at sn.scee.net
Fri Oct 17 10:27:06 PDT 2014


Author: adibiagio
Date: Fri Oct 17 12:27:06 2014
New Revision: 220054

URL: http://llvm.org/viewvc/llvm-project?rev=220054&view=rev
Log:
[X86] Fix missed selection of non-temporal store of zero vector.

When the input to a store instruction was a zero vector, the backend
always selected a normal vector store regardless of the non-temporal
hint. This is fixed by this patch.

This fixes PR19370.

Added:
    llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=220054&r1=220053&r2=220054&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Oct 17 12:27:06 2014
@@ -3939,6 +3939,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (
                   PS, Requires<[HasSSE2]>;
 } // SchedRW = [WriteStore]
 
+let Predicates = [HasAVX, NoVLX] in {
+  def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+            (VMOVNTPSmr addr:$dst, VR128:$src)>;
+}
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+          (MOVNTPSmr addr:$dst, VR128:$src)>;
+
 } // AddedComplexity
 
 //===----------------------------------------------------------------------===//

Added: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=220054&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (added)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Fri Oct 17 12:27:06 2014
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+; Make sure that we generate non-temporal stores for the test cases below.
+
+define void @test1(<4 x float>* %dst) {
+; CHECK-LABEL: test1:
+; SSE: movntps
+; AVX: vmovntps
+  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test2(<4 x i32>* %dst) {
+; CHECK-LABEL: test2:
+; SSE: movntps
+; AVX: vmovntps
+  store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test3(<2 x double>* %dst) {
+; CHECK-LABEL: test3:
+; SSE: movntps
+; AVX: vmovntps
+  store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+!1 = metadata !{i32 1}





More information about the llvm-commits mailing list