[llvm] r278316 - [AVX-512] Fix the 128-bit and 256-bit nontemporal load patterns with elements type other than i64. These loads have all been promoted to v2i64/v4i64 loads so we need bitcasts or we end up selecting VMOVDQA32/VMOVDQU32 instead.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 10 23:04:00 PDT 2016


Author: ctopper
Date: Thu Aug 11 01:04:00 2016
New Revision: 278316

URL: http://llvm.org/viewvc/llvm-project?rev=278316&view=rev
Log:
[AVX-512] Fix the 128-bit and 256-bit nontemporal load patterns with elements type other than i64. These loads have all been promoted to v2i64/v4i64 loads so we need bitcasts or we end up selecting VMOVDQA32/VMOVDQU32 instead.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=278316&r1=278315&r2=278316&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Aug 11 01:04:00 2016
@@ -3443,11 +3443,11 @@ let Predicates = [HasVLX], AddedComplexi
             (VMOVNTDQAZ256rm addr:$src)>;
   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+  def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+  def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+  def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
 
   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
@@ -3463,11 +3463,11 @@ let Predicates = [HasVLX], AddedComplexi
             (VMOVNTDQAZ128rm addr:$src)>;
   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ128rm addr:$src)>;
-  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+  def : Pat<(v4i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ128rm addr:$src)>;
-  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+  def : Pat<(v8i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ128rm addr:$src)>;
-  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+  def : Pat<(v16i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ128rm addr:$src)>;
 }
 

Modified: llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll?rev=278316&r1=278315&r2=278316&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll Thu Aug 11 01:04:00 2016
@@ -47,20 +47,10 @@ define <4 x i32> @test_v4i32(<4 x i32>*
 ; AVX-NEXT:    vmovntdqa (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: test_v4i32:
-; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vmovntdqa (%rdi), %xmm0
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: test_v4i32:
-; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vmovntdqa (%rdi), %xmm0
-; AVX512BW-NEXT:    retq
-;
-; AVX512VL-LABEL: test_v4i32:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vmovaps (%rdi), %xmm0
-; AVX512VL-NEXT:    retq
+; AVX512-LABEL: test_v4i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovntdqa (%rdi), %xmm0
+; AVX512-NEXT:    retq
   %1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
   ret <4 x i32> %1
 }




More information about the llvm-commits mailing list