[llvm] r308702 - [AVX-512] Fix a bug that prevented some non-temporal loads from using the movntdqa instruction.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 20 17:40:42 PDT 2017


Author: ctopper
Date: Thu Jul 20 17:40:42 2017
New Revision: 308702

URL: http://llvm.org/viewvc/llvm-project?rev=308702&view=rev
Log:
[AVX-512] Fix a bug that prevented some non-temporal loads from using the movntdqa instruction.

The bitconverts here had an input type of 128-bits and an output type of 256 bits. The input type should also have been 256 bits.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=308702&r1=308701&r2=308702&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Jul 20 17:40:42 2017
@@ -4328,11 +4328,11 @@ let Predicates = [HasVLX], AddedComplexi
             (VMOVNTDQAZ256rm addr:$src)>;
   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
+  def : Pat<(v8i32 (bitconvert (v4i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
+  def : Pat<(v16i16 (bitconvert (v4i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
+  def : Pat<(v32i8 (bitconvert (v4i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
 
   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),

Modified: llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll?rev=308702&r1=308701&r2=308702&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-loads.ll Thu Jul 20 17:40:42 2017
@@ -211,20 +211,10 @@ define <8 x i32> @test_v8i32(<8 x i32>*
 ; AVX2-NEXT:    vmovntdqa (%rdi), %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: test_v8i32:
-; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm0
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: test_v8i32:
-; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vmovntdqa (%rdi), %ymm0
-; AVX512BW-NEXT:    retq
-;
-; AVX512VL-LABEL: test_v8i32:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vmovaps (%rdi), %ymm0
-; AVX512VL-NEXT:    retq
+; AVX512-LABEL: test_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm0
+; AVX512-NEXT:    retq
   %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
   ret <8 x i32> %1
 }
@@ -876,22 +866,11 @@ define <8 x i32> @test_arg_v8i32(<8 x i3
 ; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: test_arg_v8i32:
-; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vmovntdqa (%rdi), %ymm1
-; AVX512F-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    retq
-;
-; AVX512BW-LABEL: test_arg_v8i32:
-; AVX512BW:       # BB#0:
-; AVX512BW-NEXT:    vmovntdqa (%rdi), %ymm1
-; AVX512BW-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT:    retq
-;
-; AVX512VL-LABEL: test_arg_v8i32:
-; AVX512VL:       # BB#0:
-; AVX512VL-NEXT:    vpaddd (%rdi), %ymm0, %ymm0
-; AVX512VL-NEXT:    retq
+; AVX512-LABEL: test_arg_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovntdqa (%rdi), %ymm1
+; AVX512-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
   %2 = add <8 x i32> %arg, %1
   ret <8 x i32> %2




More information about the llvm-commits mailing list