[llvm] r177130 - Unaligned loads should use the VMOVUPS opcode.

Nadav Rotem nrotem at apple.com
Thu Mar 14 16:49:44 PDT 2013


Author: nadav
Date: Thu Mar 14 18:49:44 2013
New Revision: 177130

URL: http://llvm.org/viewvc/llvm-project?rev=177130&view=rev
Log:
Unaligned loads should use the VMOVUPS opcode.

Added:
    llvm/trunk/test/CodeGen/X86/vec_align_i256.ll
Modified:
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx-load-store.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=177130&r1=177129&r2=177130&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Mar 14 18:49:44 2013
@@ -1009,7 +1009,7 @@ let Predicates = [HasAVX] in {
             (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
   def : Pat<(store (v8i16 (extract_subvector
                            (v16i16 VR256:$src), (iPTR 0))), addr:$dst),
-            (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
+            (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
   def : Pat<(store (v16i8 (extract_subvector
                            (v32i8 VR256:$src), (iPTR 0))), addr:$dst),
             (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;

Modified: llvm/trunk/test/CodeGen/X86/avx-load-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-load-store.ll?rev=177130&r1=177129&r2=177130&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-load-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-load-store.ll Thu Mar 14 18:49:44 2013
@@ -55,7 +55,7 @@ define void @storev16i16(<16 x i16> %a)
 
 ; CHECK: storev16i16_01
 ; CHECK: vextractf128
-; CHECK: vmovaps  %xmm
+; CHECK: vmovups  %xmm
 define void @storev16i16_01(<16 x i16> %a) nounwind {
   store <16 x i16> %a, <16 x i16>* undef, align 4
   unreachable

Added: llvm/trunk/test/CodeGen/X86/vec_align_i256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_align_i256.ll?rev=177130&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_align_i256.ll (added)
+++ llvm/trunk/test/CodeGen/X86/vec_align_i256.ll Thu Mar 14 18:49:44 2013
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mcpu=corei7-avx | FileCheck %s 
+
+; Make sure that we are not generating a movaps because the vector is aligned to 1.
+;CHECK: @foo
+;CHECK: xor
+;CHECK-NEXT: vmovups
+;CHECK-NEXT: ret
+define void @foo() {
+  store <16 x i16> zeroinitializer, <16 x i16>* undef, align 1
+  ret void
+}





More information about the llvm-commits mailing list