[llvm-branch-commits] [llvm-branch] r360749 - Merging r360512:

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue May 14 21:49:12 PDT 2019


Author: tstellar
Date: Tue May 14 21:49:11 2019
New Revision: 360749

URL: http://llvm.org/viewvc/llvm-project?rev=360749&view=rev
Log:
Merging r360512:

------------------------------------------------------------------------
r360512 | ctopper | 2019-05-10 21:19:33 -0700 (Fri, 10 May 2019) | 5 lines

[X86] Don't emit MOVNTDQA loads from fast-isel without SSE4.1.

We were checking for SSE4.1 for FP types, but not integer 128-bit types.

Fixes PR41837.
------------------------------------------------------------------------

Modified:
    llvm/branches/release_80/lib/Target/X86/X86FastISel.cpp
    llvm/branches/release_80/test/CodeGen/X86/fast-isel-nontemporal.ll

Modified: llvm/branches/release_80/lib/Target/X86/X86FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/lib/Target/X86/X86FastISel.cpp?rev=360749&r1=360748&r2=360749&view=diff
==============================================================================
--- llvm/branches/release_80/lib/Target/X86/X86FastISel.cpp (original)
+++ llvm/branches/release_80/lib/Target/X86/X86FastISel.cpp Tue May 14 21:49:11 2019
@@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT
   case MVT::v2i64:
   case MVT::v8i16:
   case MVT::v16i8:
-    if (IsNonTemporal && Alignment >= 16)
+    if (IsNonTemporal && Alignment >= 16 && HasSSE41)
       Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
             HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
     else if (Alignment >= 16)

Modified: llvm/branches/release_80/test/CodeGen/X86/fast-isel-nontemporal.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_80/test/CodeGen/X86/fast-isel-nontemporal.ll?rev=360749&r1=360748&r2=360749&view=diff
==============================================================================
--- llvm/branches/release_80/test/CodeGen/X86/fast-isel-nontemporal.ll (original)
+++ llvm/branches/release_80/test/CodeGen/X86/fast-isel-nontemporal.ll Tue May 14 21:49:11 2019
@@ -300,10 +300,20 @@ entry:
 }
 
 define <16 x i8> @test_load_nt16xi8(<16 x i8>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt16xi8:
-; SSE:       # %bb.0: # %entry
-; SSE-NEXT:    movntdqa (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_load_nt16xi8:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_load_nt16xi8:
+; SSE4A:       # %bb.0: # %entry
+; SSE4A-NEXT:    movdqa (%rdi), %xmm0
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_load_nt16xi8:
+; SSE41:       # %bb.0: # %entry
+; SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_load_nt16xi8:
 ; AVX:       # %bb.0: # %entry
@@ -320,10 +330,20 @@ entry:
 }
 
 define <8 x i16> @test_load_nt8xi16(<8 x i16>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt8xi16:
-; SSE:       # %bb.0: # %entry
-; SSE-NEXT:    movntdqa (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_load_nt8xi16:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_load_nt8xi16:
+; SSE4A:       # %bb.0: # %entry
+; SSE4A-NEXT:    movdqa (%rdi), %xmm0
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_load_nt8xi16:
+; SSE41:       # %bb.0: # %entry
+; SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_load_nt8xi16:
 ; AVX:       # %bb.0: # %entry
@@ -340,10 +360,20 @@ entry:
 }
 
 define <4 x i32> @test_load_nt4xi32(<4 x i32>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt4xi32:
-; SSE:       # %bb.0: # %entry
-; SSE-NEXT:    movntdqa (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_load_nt4xi32:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_load_nt4xi32:
+; SSE4A:       # %bb.0: # %entry
+; SSE4A-NEXT:    movdqa (%rdi), %xmm0
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_load_nt4xi32:
+; SSE41:       # %bb.0: # %entry
+; SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_load_nt4xi32:
 ; AVX:       # %bb.0: # %entry
@@ -360,10 +390,20 @@ entry:
 }
 
 define <2 x i64> @test_load_nt2xi64(<2 x i64>* nocapture %ptr) {
-; SSE-LABEL: test_load_nt2xi64:
-; SSE:       # %bb.0: # %entry
-; SSE-NEXT:    movntdqa (%rdi), %xmm0
-; SSE-NEXT:    retq
+; SSE2-LABEL: test_load_nt2xi64:
+; SSE2:       # %bb.0: # %entry
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_load_nt2xi64:
+; SSE4A:       # %bb.0: # %entry
+; SSE4A-NEXT:    movdqa (%rdi), %xmm0
+; SSE4A-NEXT:    retq
+;
+; SSE41-LABEL: test_load_nt2xi64:
+; SSE41:       # %bb.0: # %entry
+; SSE41-NEXT:    movntdqa (%rdi), %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: test_load_nt2xi64:
 ; AVX:       # %bb.0: # %entry




More information about the llvm-branch-commits mailing list